diff --git a/.classpath b/.classpath index 4fde3ef69..80c2952c7 100644 --- a/.classpath +++ b/.classpath @@ -11,8 +11,10 @@ + + + - diff --git a/LICENSE.txt b/LICENSE.txt index 69409bed8..0721cdf14 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,45 @@ /* * (c) Copyright 2010 Metaweb Technologies, Inc. All rights reserved. - * */ + +See the 'licenses' directory for a list of the licenses for the libraries we depend on, +ordered here by license: + +licenses/apache2.0.LICENSE.txt + jetty + jetty-util + servlet-api + commons-lang + commons-codec + log4j + xmlbeans + poi + poi-ooxml + poi-ooxml-schemas + jdatapath + +licenses/secondstring.LICENSE.txt (BSD family) + secondstring + +licenses/dom4j.LICENSE.txt (BSD family) + dom4j + +licenses/simile.LICENSE.txt (BSD family) + vicino + +licenses/arithcode.LICENSE.txt (BSD family) + arithcode + +licenses/icu4j.LICENSE.txt (MIT family) + icu4j + +licenses/slf4j.LICENSE.txt (MIT family) + slf4j-api + slf4j-log4j12 + jcl-over-slf4j + +licenses/json.LICENSE.txt (MIT family) + json + +licenses/cos.LICENSE.txt + cos \ No newline at end of file diff --git a/lib-src/arithcode-1.1-sources.jar b/lib-src/arithcode-1.1-sources.jar new file mode 100644 index 000000000..c4569b6e9 Binary files /dev/null and b/lib-src/arithcode-1.1-sources.jar differ diff --git a/lib-src/secondstring-20100303-sources.jar b/lib-src/secondstring-20100303-sources.jar new file mode 100644 index 000000000..0fa453d45 Binary files /dev/null and b/lib-src/secondstring-20100303-sources.jar differ diff --git a/lib/arithcode-1.1.jar b/lib/arithcode-1.1.jar new file mode 100644 index 000000000..8ab9142db Binary files /dev/null and b/lib/arithcode-1.1.jar differ diff --git a/lib/secondstring-20100303.jar b/lib/secondstring-20100303.jar new file mode 100644 index 000000000..427fcbbcc Binary files /dev/null and b/lib/secondstring-20100303.jar differ diff --git a/licenses/apache2.0.LICENSE.txt b/licenses/apache2.0.LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/licenses/apache2.0.LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/licenses/arithcode.LICENSE.txt b/licenses/arithcode.LICENSE.txt new file mode 100644 index 000000000..de5381ef9 --- /dev/null +++ b/licenses/arithcode.LICENSE.txt @@ -0,0 +1,30 @@ +Copyright (c) 2002, Bob Carpenter. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of colloquial.com nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/licenses/cos.LICENSE.txt b/licenses/cos.LICENSE.txt new file mode 100644 index 000000000..e6b336ac8 --- /dev/null +++ b/licenses/cos.LICENSE.txt @@ -0,0 +1,82 @@ +Copyright (C) 2001-2009 by Jason Hunter, jhunter@servlets.com. +All rights reserved. + +The source code, object code, and documentation in the com.oreilly.servlet +package is copyright and owned by Jason Hunter. + + +ON-SITE USE RIGHTS + +Permission is granted to use the com.oreilly.servlet.* packages in the +development of any *non-commercial* project. For this use you are granted +a non-exclusive, non-transferable limited license at no cost. + +For a *commercial* project, permission is granted to use the +com.oreilly.servlet.* packages provided that every person on the development +team for that project owns a copy of the book Java Servlet Programming +(O'Reilly) in its most recent edition. The most recent edition is currently +the 2nd Edition, available in association with Amazon.com at +http://www.amazon.com/exec/obidos/ASIN/0596000405/jasonhunter. + +Other (sometimes cheaper) license terms are available upon request; please +write to jhunter@servlets.com for more information. + + +REDISTRIBUTION RIGHTS + +Commercial redistribution rights of the com.oreilly.servlet.* packages are +available by writing jhunter@servlets.com. + +Non-commercial redistribution is permitted provided that: + +1. You redistribute the package in object code form only (as Java .class files +or a .jar file containing the .class files) and only as part of a product that +uses the classes as part of its primary functionality. + +2. The product containing the package is non-commercial in nature. + +3. The public interface to the classes in the package, and the public +interface to any classes with similar functionality, is hidden from end users +when engaged in normal use of the product. + +4. The distribution is not part of a software development kit, operating +system, other library, or a development tool without written permission from +the copyright holder. + +5. The distribution includes copyright notice as follows: "The source code, +object code, and documentation in the com.oreilly.servlet package is copyright +and owned by Jason Hunter." in the documentation and/or other materials +provided with the distribution. + +6. You reproduce the above copyright notice, this list of conditions, and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +7. Licensor retains title to and ownership of the Software and all +enhancements, modifications, and updates to the Software. + +Note that the com.oreilly.servlet package is provided "as is" and the author +will not be liable for any damages suffered as a result of your use. +Furthermore, you understand the package comes without any guarantee of +technical support. + +You can always find the latest version of the com.oreilly.servlet package at +http://www.servlets.com. + + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +Thanks, + +Jason Hunter +jhunter AT servlets.com diff --git a/licenses/dom4j.LICENSE.txt b/licenses/dom4j.LICENSE.txt new file mode 100644 index 000000000..c86228f19 --- /dev/null +++ b/licenses/dom4j.LICENSE.txt @@ -0,0 +1,27 @@ +Redistribution and use of this software and associated documentation ("Software"), +with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain copyright statements and notices. +Redistributions must also contain a copy of this document. +2. Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or other +materials provided with the distribution. +3. The name "DOM4J" must not be used to endorse or promote products derived from this +Software without prior written permission of MetaStuff, Ltd. For written permission, +please contact dom4j-info@metastuff.com. +4. Products derived from this Software may not be called "DOM4J" nor may "DOM4J" +appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a +registered trademark of MetaStuff, Ltd. +5. Due credit should be given to the DOM4J Project - http://www.dom4j.org + +THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. \ No newline at end of file diff --git a/licenses/icu4j.LICENSE.txt b/licenses/icu4j.LICENSE.txt new file mode 100644 index 000000000..677f38878 --- /dev/null +++ b/licenses/icu4j.LICENSE.txt @@ -0,0 +1,28 @@ +ICU License - ICU 1.8.1 and later + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2009 International Business Machines Corporation and others + +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, provided that the above copyright notice(s) and this +permission notice appear in all copies of the Software and that both the above copyright +notice(s) and this permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL +INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not be used +in advertising or otherwise to promote the sale, use or other dealings in this Software +without prior written authorization of the copyright holder. + diff --git a/licenses/json.LICENSE.txt b/licenses/json.LICENSE.txt new file mode 100644 index 000000000..493319796 --- /dev/null +++ b/licenses/json.LICENSE.txt @@ -0,0 +1,21 @@ +Copyright (c) 2002 JSON.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/licenses/secondstring.LICENSE.txt b/licenses/secondstring.LICENSE.txt new file mode 100644 index 000000000..b65a2af97 --- /dev/null +++ b/licenses/secondstring.LICENSE.txt @@ -0,0 +1,41 @@ +Copyright (c) 2003 Carnegie Mellon University +All rights reserved. +Developed by: Center for Automated Learning and Discovery + Carnegie Mellon University + http://www.cald.cs.cmu.edu + + The design and implementation of this software was supported in + part by National Science Foundation Grant No. EIA-0131884 to the + National Institute of Statistical Sciences, and by a contract + from the Army Research Office to the Center for Computer and + Communications Security with Carnegie Mellon University. + + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal with the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimers. +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimers in the +documentation and/or other materials provided with the distribution. +Neither the names of the Center for Automated Learning and Discovery, +or Carnegie Mellon University, nor the names of its contributors may +be used to endorse or promote products derived from this Software +without specific prior written permission. THE SOFTWARE IS PROVIDED +"AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS WITH THE SOFTWARE. + +[This is an instance of the University of Illinois/NCSA Open Source +agreement, obtained from http://www.opensource.org/licenses/UoI-NCSA.php] + diff --git a/licenses/simile.LICENSE.txt b/licenses/simile.LICENSE.txt new file mode 100644 index 000000000..69bf77c10 --- /dev/null +++ b/licenses/simile.LICENSE.txt @@ -0,0 +1,29 @@ +/* + * (c) Copyright Massachusetts Institute of Technology & Contributors. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ diff --git a/licenses/slf4j.LICENSE.txt b/licenses/slf4j.LICENSE.txt new file mode 100644 index 000000000..29182a587 --- /dev/null +++ b/licenses/slf4j.LICENSE.txt @@ -0,0 +1,22 @@ + Copyright (c) 2004-2008 QOS.ch + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + \ No newline at end of file diff --git a/src/main/java/edu/mit/simile/vicino/Distance.java b/src/main/java/edu/mit/simile/vicino/Distance.java new file mode 100644 index 000000000..e9da35d46 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Distance.java @@ -0,0 +1,7 @@ +package edu.mit.simile.vicino; + +public interface Distance { + + public float d(String x, String y); + +} diff --git a/src/main/java/edu/mit/simile/vicino/Distributor.java b/src/main/java/edu/mit/simile/vicino/Distributor.java new file mode 100644 index 000000000..1ac104b7c --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Distributor.java @@ -0,0 +1,61 @@ +package edu.mit.simile.vicino; + +import java.util.List; + +import edu.mit.simile.vicino.Distance; + +public class Distributor extends Operator { + + private static final int COLUMNS = 70; + private static final char CHAR = '='; + + public static void main(String[] args) throws Exception { + + Distance d = getDistance(args[0]); + + List strings = getStrings(args[1]); + + int buckets = Integer.parseInt(args[2]); + + long start = System.currentTimeMillis(); + int[] values = new int[buckets]; + + int size = strings.size(); + for (int i = 0; i < size; i++) { + String x = (String) strings.get((int) (Math.random() * size)); + String y = (String) strings.get((int) (Math.random() * size)); + int dist = (int) (buckets * d.d(x, y)); + values[dist]++; + System.out.print("."); + } + System.out.println(); + + long stop = System.currentTimeMillis(); + float m = ((float) (stop - start)) / (float) size; + + int maxValue = 0; + for (int i = 0; i < buckets; i++) { + if (values[i] > maxValue) { + maxValue = values[i]; + } + } + + System.out + .println("+-------------------------------------------------------------------"); + for (int i = 0; i < buckets; i++) { + System.out.println("|" + bar(COLUMNS * values[i] / maxValue)); + } + System.out + .println("+-------------------------------------------------------------------"); + + System.out.println("\n Each distance calculation took: " + m + " millis"); + } + + static private String bar(int value) { + StringBuffer b = new StringBuffer(value); + for (int i = 0; i < value; i++) { + b.append(CHAR); + } + return b.toString(); + } +} diff --git a/src/main/java/edu/mit/simile/vicino/Meter.java b/src/main/java/edu/mit/simile/vicino/Meter.java new file mode 100644 index 000000000..d83defb70 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Meter.java @@ -0,0 +1,10 @@ +package edu.mit.simile.vicino; + +public class Meter extends Operator { + + public static void main(String[] args) throws Exception { + Distance d = getDistance(args[0]); + System.out.println(args[1] + " <- " + d.d(args[1], args[2]) + " -> " + args[2]); + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/Operator.java b/src/main/java/edu/mit/simile/vicino/Operator.java new file mode 100644 index 000000000..83eea561c --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Operator.java @@ -0,0 +1,47 @@ +package edu.mit.simile.vicino; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class Operator { + + static void log(String msg) { + System.out.println(msg); + } + + static Distance getDistance(String distance) throws Exception { + return (Distance) Class.forName("edu.mit.simile.vicino.distances." + distance + "Distance").newInstance(); + } + + static List getStrings(String fileName) throws IOException { + ArrayList strings = new ArrayList(); + + File file = new File(fileName); + if (file.isDirectory()) { + File[] files = file.listFiles(); + for (int i = 0; i < files.length; i++) { + BufferedReader input = new BufferedReader(new FileReader(files[i])); + StringBuffer b = new StringBuffer(); + String line; + while ((line = input.readLine()) != null) { + b.append(line.trim()); + } + input.close(); + strings.add(b.toString()); + } + } else { + BufferedReader input = new BufferedReader(new FileReader(fileName)); + String line; + while ((line = input.readLine()) != null) { + strings.add(line.trim()); + } + input.close(); + } + + return strings; + } +} diff --git a/src/main/java/edu/mit/simile/vicino/Seeker.java b/src/main/java/edu/mit/simile/vicino/Seeker.java new file mode 100644 index 000000000..d52ea5bbf --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Seeker.java @@ -0,0 +1,52 @@ +package edu.mit.simile.vicino; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.Serializable; +import java.util.Iterator; +import java.util.List; + +import edu.mit.simile.vicino.vptree.VPTree; +import edu.mit.simile.vicino.vptree.VPTreeBuilder; +import edu.mit.simile.vicino.vptree.VPTreeSeeker; + +public class Seeker extends Operator { + + public static void main(String[] args) throws Exception { + Distance d = getDistance(args[0]); + + log("Working with distance: " + d); + List strings = getStrings(args[1]); + log("Obtained " + strings.size() + " from " + args[1]); + + log("Building VPTree..."); + VPTreeBuilder builder = new VPTreeBuilder(d); + VPTree tree = builder.buildVPTree(strings); + log("..done"); + + VPTreeSeeker seeker = new VPTreeSeeker(d, tree); + + log("type a string|range then hit return:"); + BufferedReader input = new BufferedReader(new InputStreamReader( + System.in)); + String line = null; + while ((line = input.readLine()) != null) { + int index = line.indexOf('|'); + String query = line.substring(0, index); + float range = Float.parseFloat(line.substring(index + 1)); + long start = System.currentTimeMillis(); + List results = seeker.range(query, range); + long stop = System.currentTimeMillis(); + Iterator j = results.iterator(); + if (j.hasNext()) { + while (j.hasNext()) { + String r = (String) j.next(); + log(" " + r); + } + log(" [done in " + (stop - start) + "ms]"); + } else { + log(" [no results found in " + (stop - start) + "ms]"); + } + } + } +} diff --git a/src/main/java/edu/mit/simile/vicino/Tester.java b/src/main/java/edu/mit/simile/vicino/Tester.java new file mode 100644 index 000000000..b97255c6c --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/Tester.java @@ -0,0 +1,50 @@ +package edu.mit.simile.vicino; + +import java.util.List; + +public class Tester extends Operator { + + public static void main(String[] args) throws Exception { + Distance d = getDistance(args[0]); + + List strings = getStrings(args[1]); + + long start = System.currentTimeMillis(); + + int size = strings.size(); + for (int i = 0; i < size; i++) { + String x = (String) strings.get((int) (Math.random() * size)); + String y = (String) strings.get((int) (Math.random() * size)); + String z = (String) strings.get((int) (Math.random() * size)); + boolean metric = metric(x, y, z, d); + if (metric) { + System.out.println("metric"); + } else { + System.out.println("***** NOT METRIC *****"); + } + } + + long stop = System.currentTimeMillis(); + float m = ((float) (stop - start)) / (float) size; + + System.out.println("\n Each metric evaluation took: " + m + " millis"); + } + + static boolean metric(String x, String y, String z, Distance d) { + float dxx = d.d(x, x); + boolean identity = (dxx == 0.0f); + float dxy = d.d(x, y); + float dyx = d.d(y, x); + boolean simmetrical = (dxy == dyx); + float dxz = d.d(x, z); + float dyz = d.d(y, z); + boolean triangular = (dxz <= dxy + dyz); + return (identity && simmetrical && triangular); + } + + static Distance getDistance(String distance) throws Exception { + return (Distance) Class.forName( + "edu.mit.simile.vicino.distances." + distance + "Distance") + .newInstance(); + } +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java b/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java new file mode 100644 index 000000000..dea474099 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java @@ -0,0 +1,26 @@ +package edu.mit.simile.vicino.distances; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.tools.bzip2.CBZip2OutputStream; + +public class BZip2Distance extends PseudoMetricDistance { + + public float d2(String x, String y) { + String str = x + y; + float result = 0.0f; + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); + CBZip2OutputStream os = new CBZip2OutputStream(baos); + os.write(str.getBytes()); + os.close(); + baos.close(); + result = baos.toByteArray().length; + } catch (IOException e) { + e.printStackTrace(); + } + return result; + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java b/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java new file mode 100644 index 000000000..5bbfe36ff --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java @@ -0,0 +1,26 @@ +package edu.mit.simile.vicino.distances; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.zip.GZIPOutputStream; + +public class GZipDistance extends PseudoMetricDistance { + + public float d2(String x, String y) { + String str = x + y; + float result = 0.0f; + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); + GZIPOutputStream os = new GZIPOutputStream(baos); + os.write(str.getBytes()); + os.close(); + baos.close(); + result = baos.toByteArray().length; + } catch (IOException e) { + e.printStackTrace(); + } + return result; + + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java new file mode 100644 index 000000000..0e4f50670 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java @@ -0,0 +1,18 @@ +package edu.mit.simile.vicino.distances; + +import com.wcohen.ss.Jaccard; +import com.wcohen.ss.api.StringDistance; + +public class JaccardDistance extends MetricDistance { + + StringDistance distance; + + public JaccardDistance() { + this.distance = new Jaccard(); + } + + protected float d2(String x, String y) { + return Math.abs((float) this.distance.score(x, y) - 1.0f); + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java new file mode 100644 index 000000000..d17166d51 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java @@ -0,0 +1,18 @@ +package edu.mit.simile.vicino.distances; + +import com.wcohen.ss.Jaro; +import com.wcohen.ss.api.StringDistance; + +public class JaroDistance extends MetricDistance { + + StringDistance distance; + + public JaroDistance() { + this.distance = new Jaro(); + } + + protected float d2(String x, String y) { + return Math.abs((float) this.distance.score(x, y) - 1.0f); + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java b/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java new file mode 100644 index 000000000..5eaff95a4 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java @@ -0,0 +1,22 @@ +package edu.mit.simile.vicino.distances; + +import com.wcohen.ss.Levenstein; +import com.wcohen.ss.api.StringDistance; + +import edu.mit.simile.vicino.Distance; + +public class LevensteinDistance implements Distance { + + StringDistance distance; + + public LevensteinDistance() { + this.distance = new Levenstein(); + } + + public float d(String x, String y) { + float d = Math.abs((float) this.distance.score(x, y)); + // System.out.println(this.distance.explainScore(x,y)); + return d / (x.length() + y.length()); + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java new file mode 100644 index 000000000..4084be1e4 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java @@ -0,0 +1,19 @@ +package edu.mit.simile.vicino.distances; + +import edu.mit.simile.vicino.Distance; + +public abstract class MetricDistance implements Distance { + + /* + * public float d(String x,String y) { float dxy = d2(x,y); float dx = + * d2(x,""); float dy = d2(y,""); float result = dxy / (dx + dy); return + * result; } + */ + + public float d(String x, String y) { + return d2(x, y); + } + + abstract float d2(String x, String y); + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java new file mode 100644 index 000000000..d4f31cb04 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java @@ -0,0 +1,27 @@ +package edu.mit.simile.vicino.distances; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import com.colloquial.arithcode.ArithCodeOutputStream; +import com.colloquial.arithcode.PPMModel; + +public class PPMDistance extends PseudoMetricDistance { + + public float d2(String x, String y) { + String str = x + y; + float result = 0.0f; + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); + ArithCodeOutputStream os = new ArithCodeOutputStream(baos,new PPMModel(8)); + os.write(str.getBytes()); + os.close(); + baos.close(); + result = baos.toByteArray().length; + } catch (IOException e) { + e.printStackTrace(); + } + return result; + } + +} diff --git a/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java new file mode 100644 index 000000000..a14f825f1 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java @@ -0,0 +1,17 @@ +package edu.mit.simile.vicino.distances; + +import edu.mit.simile.vicino.Distance; + +public abstract class PseudoMetricDistance implements Distance { + + public float d(String x, String y) { + float cxx = d2(x, x); + float cyy = d2(y, y); + float cxy = d2(x, y); + float cyx = d2(y, x); + float result1 = (cxy + cyx) / (cxx + cyy) - 1.0f; + return result1; + } + + protected abstract float d2(String x, String y); +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/Node.java b/src/main/java/edu/mit/simile/vicino/vptree/Node.java new file mode 100755 index 000000000..4de3f2f34 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/Node.java @@ -0,0 +1,42 @@ +package edu.mit.simile.vicino.vptree; + +import java.io.Serializable; + +/** + * This class represent a couple (Object, distance) value of that Object from + * the Vp in each step of the algorithm. + * + * @author Paolo Ciccarese + */ +public class Node implements Serializable { + + private static final long serialVersionUID = -2077473220894258550L; + + private final Serializable obj; + private float distance; + + public Node(Serializable obj, int i) { + this.obj = obj; + this.distance = i; + } + + public Node(Serializable obj) { + this.obj = obj; + } + + public Serializable get() { + return this.obj; + } + + public void setDistance(float distance) { + this.distance = distance; + } + + public float getDistance() { + return distance; + } + + public String toString() { + return obj.toString(); + } +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java b/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java new file mode 100755 index 000000000..2c1d2c071 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java @@ -0,0 +1,94 @@ +package edu.mit.simile.vicino.vptree; + +public class NodeSorter { + + /** + * Sort array of Objects using the QuickSort algorithm. + * + * @param s + * An Object[]. + * @param lo + * The current lower bound. + * @param hi + * The current upper bound. + */ + public static void sort(Node nodes[], int lo, int hi) { + if (lo >= hi) { + return; + } + + /* + * Use median-of-three(lo, mid, hi) to pick a partition. Also swap them + * into relative order while we are at it. + */ + int mid = (lo + hi) / 2; + + if (nodes[lo].getDistance() > nodes[mid].getDistance()) { + // Swap. + Node tmp = nodes[lo]; + nodes[lo] = nodes[mid]; + nodes[mid] = tmp; + } + + if (nodes[mid].getDistance() > nodes[hi].getDistance()) { + // Swap . + Node tmp = nodes[mid]; + nodes[mid] = nodes[hi]; + nodes[hi] = tmp; + + if (nodes[lo].getDistance() > nodes[mid].getDistance()) { + // Swap. + Node tmp2 = nodes[lo]; + nodes[lo] = nodes[mid]; + nodes[mid] = tmp2; + } + } + + // Start one past lo since already handled lo. + + int left = lo + 1; + + // Similarly, end one before hi since already handled hi. + + int right = hi - 1; + + // If there are three or fewer elements, we are done. + + if (left >= right) { + return; + } + + Node partition = nodes[mid]; + + while (true) { + while (nodes[right].getDistance() > partition.getDistance()) { + --right; + } + + while (left < right && nodes[left].getDistance() <= partition.getDistance()) { + ++left; + } + + if (left < right) { + // Swap. + Node tmp = nodes[left]; + nodes[left] = nodes[right]; + nodes[right] = tmp; + + --right; + } else { + break; + } + } + + sort(nodes, lo, left); + sort(nodes, left + 1, hi); + } + + /** + * Sorts and array of objects. + */ + public void sort(Node nodes[]) { + NodeSorter.sort(nodes, 0, nodes.length - 1); + } +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/TNode.java b/src/main/java/edu/mit/simile/vicino/vptree/TNode.java new file mode 100755 index 000000000..1f9917fc8 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/TNode.java @@ -0,0 +1,52 @@ +package edu.mit.simile.vicino.vptree; + +import java.io.Serializable; + +/** + * @author Paolo Ciccarese + */ +public class TNode implements Serializable { + + private static final long serialVersionUID = -217604190976851241L; + + private final Serializable obj; + private float median; + private TNode left; + private TNode right; + + /** + * The Object will be fixed during the instantiation of the node, while the + * children will be defined in another iteration of the algorithm, + */ + public TNode(Serializable obj) { + this.obj = obj; + } + + public Serializable get() { + return this.obj; + } + + public void setMedian(float median) { + this.median = median; + } + + public float getMedian() { + return median; + } + + public void setLeft(TNode leftNode) { + this.left = leftNode; + } + + public TNode getLeft() { + return left; + } + + public void setRight(TNode rightNode) { + this.right = rightNode; + } + + public TNode getRight() { + return right; + } +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java new file mode 100755 index 000000000..4ce8cb997 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java @@ -0,0 +1,33 @@ +package edu.mit.simile.vicino.vptree; + +import java.io.Serializable; + +/** + * The VPTree class. + * + * @author Paolo Ciccarese + */ +public class VPTree implements Serializable { + + private static final long serialVersionUID = 1291056732155841123L; + + private TNode root; + + /** + * Sets the root of the VPTree. + * + * @param root The VPTree root. + */ + public void setRoot(TNode root) { + this.root = root; + } + + /** + * Get the root of the VPTree. + * + * @return The VPTree root. + */ + public TNode getRoot() { + return root; + } +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java new file mode 100755 index 000000000..c7ea617e3 --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java @@ -0,0 +1,101 @@ +package edu.mit.simile.vicino.vptree; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; +import java.util.Random; + +import edu.mit.simile.vicino.Distance; + +/** + * @author Paolo Ciccarese + * @author Stefano Mazzocchi + */ +public class VPTreeBuilder { + + private static final boolean DEBUG = false; + + private Random generator = new Random(System.currentTimeMillis()); + + private VPTree tree; + private final Distance distance; + + /** + * Defines a VPTree Builder for a specific distance. + * + * @param distance The class implementing the distance. + */ + public VPTreeBuilder(Distance distance) { + this.distance = distance; + } + + public VPTree buildVPTree(Collection col) { + Node nodes[] = new Node[col.size()]; + Iterator i = col.iterator(); + int counter = 0; + while (i.hasNext()) { + Serializable s = (Serializable) i.next(); + nodes[counter++] = new Node(s); + } + + tree = new VPTree(); + tree.setRoot(addNode(nodes, 0, nodes.length - 1)); + return tree; + } + + private TNode addNode(Node nodes[], int begin, int end) { + + int delta = end - begin; + int middle = begin + delta / 2; + + TNode node = new TNode(nodes[begin + getRandomIndex(delta)].get()); + + if (DEBUG) System.out.println("\nnode: " + node.get().toString()); + + calculateDistances(node, nodes, begin, end); + orderDistances(nodes, begin, end); + + if (DEBUG) { + for (int i = begin; i <= end; i++) { + System.out.println(" +-- " + nodes[i].getDistance() + " --> " + nodes[i].get()); + } + } + + if (delta + 1 > 0) { + if (middle - (begin + 1) >= 1) { + node.setLeft(addNode(nodes, begin + 1, middle)); + if (DEBUG) System.out.println(" L --> " + node.getLeft().get()); + } else if (middle - (begin + 1) == 0) { + node.setLeft(new TNode(nodes[middle].get())); + if (DEBUG) System.out.println(" L --> " + node.getLeft().get()); + } + + if ((end - (middle + 1)) >= 1) { + node.setRight(addNode(nodes, middle + 1, end)); + if (DEBUG) System.out.println(" R --> " + node.getRight().get()); + } else if (end - (middle + 1) == 0) { + node.setRight(new TNode(nodes[middle + 1].get())); + if (DEBUG) System.out.println(" R --> " + node.getRight().get()); + } + } + + return node; + } + + private void calculateDistances(TNode pivot, Node nodes[], int begin, int end) { + for (int i = begin; i <= end; i++) { + Object x = pivot.get(); + Object y = nodes[i].get(); + float d = (x == y) ? 0.0f : distance.d(x.toString(), y.toString()); + nodes[i].setDistance(d); + } + } + + private void orderDistances(Node nodes[], int begin, int end) { + NodeSorter.sort(nodes, begin, end); + } + + private int getRandomIndex(int max) { + return generator.nextInt(max); + } +} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java new file mode 100755 index 000000000..389a75eaf --- /dev/null +++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java @@ -0,0 +1,48 @@ +package edu.mit.simile.vicino.vptree; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import edu.mit.simile.vicino.Distance; + +/** + * @author Paolo Ciccarese + */ +public class VPTreeSeeker { + + VPTree tree; + Distance distance; + + public VPTreeSeeker(Distance distance, VPTree tree) { + this.distance = distance; + this.tree = tree; + } + + public List range(Object query, float range) { + return rangeTraversal(query, range, tree.getRoot(), new ArrayList()); + } + + private List rangeTraversal(Object query, float range, TNode tNode, List results) { + + if (tNode != null) { + float distance = this.distance.d(query.toString(), tNode.toString()); + + if (distance < range) { + results.add(tNode.get()); + } + + if ((distance + range) < tNode.getMedian()) { + rangeTraversal(query, range, tNode.getLeft(), results); + } else if ((distance - range) > tNode.getMedian()) { + rangeTraversal(query, range, tNode.getRight(), results); + } else { + rangeTraversal(query, range, tNode.getLeft(), results); + rangeTraversal(query, range, tNode.getRight(), results); + } + } + + return results; + } + +} diff --git a/src/main/java/org/apache/tools/bzip2/BZip2Constants.java b/src/main/java/org/apache/tools/bzip2/BZip2Constants.java new file mode 100644 index 000000000..4f832d67d --- /dev/null +++ b/src/main/java/org/apache/tools/bzip2/BZip2Constants.java @@ -0,0 +1,136 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "Ant" and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.tools.bzip2; + +/** + * Base class for both the compress and decompress classes. + * Holds common arrays, and static data. + * + * @author Keiron Liddle + */ +public interface BZip2Constants { + + int baseBlockSize = 100000; + int MAX_ALPHA_SIZE = 258; + int MAX_CODE_LEN = 23; + int RUNA = 0; + int RUNB = 1; + int N_GROUPS = 6; + int G_SIZE = 50; + int N_ITERS = 4; + int MAX_SELECTORS = (2 + (900000 / G_SIZE)); + int NUM_OVERSHOOT_BYTES = 20; + + int[] rNums = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 + }; +} diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java new file mode 100644 index 000000000..da4c82017 --- /dev/null +++ b/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java @@ -0,0 +1,865 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001-2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "Ant" and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ +package org.apache.tools.bzip2; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An input stream that decompresses from the BZip2 format (without the file + * header chars) to be read as any other stream. + * + * @author Keiron Liddle + */ +public class CBZip2InputStream extends InputStream implements BZip2Constants { + private static void cadvise() { + System.out.println("CRC Error"); + //throw new CCoruptionError(); + } + + private static void compressedStreamEOF() { + cadvise(); + } + + private void makeMaps() { + int i; + nInUse = 0; + for (i = 0; i < 256; i++) { + if (inUse[i]) { + seqToUnseq[nInUse] = (char) i; + unseqToSeq[i] = (char) nInUse; + nInUse++; + } + } + } + + /* + index of the last char in the block, so + the block size == last + 1. + */ + private int last; + + /* + index in zptr[] of original string after sorting. + */ + private int origPtr; + + /* + always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + private int blockSize100k; + + private boolean blockRandomised; + + private int bsBuff; + private int bsLive; + private CRC mCrc = new CRC(); + + private boolean[] inUse = new boolean[256]; + private int nInUse; + + private char[] seqToUnseq = new char[256]; + private char[] unseqToSeq = new char[256]; + + private char[] selector = new char[MAX_SELECTORS]; + private char[] selectorMtf = new char[MAX_SELECTORS]; + + private int[] tt; + private char[] ll8; + + /* + freq table collected to save a pass over the data + during decompression. + */ + private int[] unzftab = new int[256]; + + private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; + private int[] minLens = new int[N_GROUPS]; + + private InputStream bsStream; + + private boolean streamEnd = false; + + private int currentChar = -1; + + private static final int START_BLOCK_STATE = 1; + private static final int RAND_PART_A_STATE = 2; + private static final int RAND_PART_B_STATE = 3; + private static final int RAND_PART_C_STATE = 4; + private static final int NO_RAND_PART_A_STATE = 5; + private static final int NO_RAND_PART_B_STATE = 6; + private static final int NO_RAND_PART_C_STATE = 7; + + private int currentState = START_BLOCK_STATE; + + private int storedBlockCRC, storedCombinedCRC; + private int computedBlockCRC, computedCombinedCRC; + + int i2, count, chPrev, ch2; + int i, tPos; + int rNToGo = 0; + int rTPos = 0; + int j2; + char z; + + public CBZip2InputStream(InputStream zStream) { + ll8 = null; + tt = null; + bsSetStream(zStream); + initialize(); + initBlock(); + setupBlock(); + } + + public int read() { + if (streamEnd) { + return -1; + } else { + int retChar = currentChar; + switch(currentState) { + case START_BLOCK_STATE: + break; + case RAND_PART_A_STATE: + break; + case RAND_PART_B_STATE: + setupRandPartB(); + break; + case RAND_PART_C_STATE: + setupRandPartC(); + break; + case NO_RAND_PART_A_STATE: + break; + case NO_RAND_PART_B_STATE: + setupNoRandPartB(); + break; + case NO_RAND_PART_C_STATE: + setupNoRandPartC(); + break; + default: + break; + } + return retChar; + } + } + + private void initialize() { + char magic3, magic4; + magic3 = bsGetUChar(); + magic4 = bsGetUChar(); + if (magic3 != 'h' || magic4 < '1' || magic4 > '9') { + bsFinishedWithStream(); + streamEnd = true; + return; + } + + setDecompressStructureSizes(magic4 - '0'); + computedCombinedCRC = 0; + } + + private void initBlock() { + char magic1, magic2, magic3, magic4; + char magic5, magic6; + magic1 = bsGetUChar(); + magic2 = bsGetUChar(); + magic3 = bsGetUChar(); + magic4 = bsGetUChar(); + magic5 = bsGetUChar(); + magic6 = bsGetUChar(); + if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 + && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) { + complete(); + return; + } + + if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 + || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) { + badBlockHeader(); + streamEnd = true; + return; + } + + storedBlockCRC = bsGetInt32(); + + if (bsR(1) == 1) { + blockRandomised = true; + } else { + blockRandomised = false; + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + mCrc.initialiseCRC(); + currentState = START_BLOCK_STATE; + } + + private void endBlock() { + computedBlockCRC = mCrc.getFinalCRC(); + /* A bad CRC is considered a fatal error. */ + if (storedBlockCRC != computedBlockCRC) { + crcError(); + } + + computedCombinedCRC = (computedCombinedCRC << 1) + | (computedCombinedCRC >>> 31); + computedCombinedCRC ^= computedBlockCRC; + } + + private void complete() { + storedCombinedCRC = bsGetInt32(); + if (storedCombinedCRC != computedCombinedCRC) { + crcError(); + } + + bsFinishedWithStream(); + streamEnd = true; + } + + private static void blockOverrun() { + cadvise(); + } + + private static void badBlockHeader() { + cadvise(); + } + + private static void crcError() { + cadvise(); + } + + private void bsFinishedWithStream() { + try { + if (this.bsStream != null) { + if (this.bsStream != System.in) { + this.bsStream.close(); + this.bsStream = null; + } + } + } catch (IOException ioe) { + //ignore + } + } + + private void bsSetStream(InputStream f) { + bsStream = f; + bsLive = 0; + bsBuff = 0; + } + + private int bsR(int n) { + int v; + while (bsLive < n) { + int zzi; + char thech = 0; + try { + thech = (char) bsStream.read(); + } catch (IOException e) { + compressedStreamEOF(); + } + if (thech == -1) { + compressedStreamEOF(); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + + v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); + bsLive -= n; + return v; + } + + private char bsGetUChar() { + return (char) bsR(8); + } + + private int bsGetint() { + int u = 0; + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + u = (u << 8) | bsR(8); + return u; + } + + private int bsGetIntVS(int numBits) { + return (int) bsR(numBits); + } + + private int bsGetInt32() { + return (int) bsGetint(); + } + + private void hbCreateDecodeTables(int[] limit, int[] base, + int[] perm, char[] length, + int minLen, int maxLen, int alphaSize) { + int pp, i, j, vec; + + pp = 0; + for (i = minLen; i <= maxLen; i++) { + for (j = 0; j < alphaSize; j++) { + if (length[j] == i) { + perm[pp] = j; + pp++; + } + } + } + + for (i = 0; i < MAX_CODE_LEN; i++) { + base[i] = 0; + } + for (i = 0; i < alphaSize; i++) { + base[length[i] + 1]++; + } + + for (i = 1; i < MAX_CODE_LEN; i++) { + base[i] += base[i - 1]; + } + + for (i = 0; i < MAX_CODE_LEN; i++) { + limit[i] = 0; + } + vec = 0; + + for (i = minLen; i <= maxLen; i++) { + vec += (base[i + 1] - base[i]); + limit[i] = vec - 1; + vec <<= 1; + } + for (i = minLen + 1; i <= maxLen; i++) { + base[i] = ((limit[i - 1] + 1) << 1) - base[i]; + } + } + + private void recvDecodingTables() { + char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; + int i, j, t, nGroups, nSelectors, alphaSize; + int minLen, maxLen; + boolean[] inUse16 = new boolean[16]; + + /* Receive the mapping table */ + for (i = 0; i < 16; i++) { + if (bsR(1) == 1) { + inUse16[i] = true; + } else { + inUse16[i] = false; + } + } + + for (i = 0; i < 256; i++) { + inUse[i] = false; + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + for (j = 0; j < 16; j++) { + if (bsR(1) == 1) { + inUse[i * 16 + j] = true; + } + } + } + } + + makeMaps(); + alphaSize = nInUse + 2; + + /* Now the selectors */ + nGroups = bsR(3); + nSelectors = bsR(15); + for (i = 0; i < nSelectors; i++) { + j = 0; + while (bsR(1) == 1) { + j++; + } + selectorMtf[i] = (char) j; + } + + /* Undo the MTF values for the selectors. */ + { + char[] pos = new char[N_GROUPS]; + char tmp, v; + for (v = 0; v < nGroups; v++) { + pos[v] = v; + } + + for (i = 0; i < nSelectors; i++) { + v = selectorMtf[i]; + tmp = pos[v]; + while (v > 0) { + pos[v] = pos[v - 1]; + v--; + } + pos[0] = tmp; + selector[i] = tmp; + } + } + + /* Now the coding tables */ + for (t = 0; t < nGroups; t++) { + int curr = bsR(5); + for (i = 0; i < alphaSize; i++) { + while (bsR(1) == 1) { + if (bsR(1) == 0) { + curr++; + } else { + curr--; + } + } + len[t][i] = (char) curr; + } + } + + /* Create the Huffman decoding tables */ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (len[t][i] > maxLen) { + maxLen = len[t][i]; + } + if (len[t][i] < minLen) { + minLen = len[t][i]; + } + } + hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, + maxLen, alphaSize); + minLens[t] = minLen; + } + } + + private void getAndMoveToFrontDecode() { + char[] yy = new char[256]; + int i, j, nextSym, limitLast; + int EOB, groupNo, groupPos; + + limitLast = baseBlockSize * blockSize100k; + origPtr = bsGetIntVS(24); + + recvDecodingTables(); + EOB = nInUse + 1; + groupNo = -1; + groupPos = 0; + + /* + Setting up the unzftab entries here is not strictly + necessary, but it does save having to do it later + in a separate pass, and so saves a block's worth of + cache misses. + */ + for (i = 0; i <= 255; i++) { + unzftab[i] = 0; + } + + for (i = 0; i <= 255; i++) { + yy[i] = (char) i; + } + + last = -1; + + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + try { + thech = (char) bsStream.read(); + } catch (IOException e) { + compressedStreamEOF(); + } + if (thech == -1) { + compressedStreamEOF(); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + + while (true) { + + if (nextSym == EOB) { + break; + } + + if (nextSym == RUNA || nextSym == RUNB) { + char ch; + int s = -1; + int N = 1; + do { + if (nextSym == RUNA) { + s = s + (0 + 1) * N; + } else if (nextSym == RUNB) { + s = s + (1 + 1) * N; + } + N = N * 2; + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + try { + thech = (char) bsStream.read(); + } catch (IOException e) { + compressedStreamEOF(); + } + if (thech == -1) { + compressedStreamEOF(); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + } while (nextSym == RUNA || nextSym == RUNB); + + s++; + ch = seqToUnseq[yy[0]]; + unzftab[ch] += s; + + while (s > 0) { + last++; + ll8[last] = ch; + s--; + } + + if (last >= limitLast) { + blockOverrun(); + } + continue; + } else { + char tmp; + last++; + if (last >= limitLast) { + blockOverrun(); + } + + tmp = yy[nextSym - 1]; + unzftab[seqToUnseq[tmp]]++; + ll8[last] = seqToUnseq[tmp]; + + /* + This loop is hammered during decompression, + hence the unrolling. + + for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; + */ + + j = nextSym - 1; + for (; j > 3; j -= 4) { + yy[j] = yy[j - 1]; + yy[j - 1] = yy[j - 2]; + yy[j - 2] = yy[j - 3]; + yy[j - 3] = yy[j - 4]; + } + for (; j > 0; j--) { + yy[j] = yy[j - 1]; + } + + yy[0] = tmp; + { + int zt, zn, zvec, zj; + if (groupPos == 0) { + groupNo++; + groupPos = G_SIZE; + } + groupPos--; + zt = selector[groupNo]; + zn = minLens[zt]; + zvec = bsR(zn); + while (zvec > limit[zt][zn]) { + zn++; + { + { + while (bsLive < 1) { + int zzi; + char thech = 0; + try { + thech = (char) bsStream.read(); + } catch (IOException e) { + compressedStreamEOF(); + } + zzi = thech; + bsBuff = (bsBuff << 8) | (zzi & 0xff); + bsLive += 8; + } + } + zj = (bsBuff >> (bsLive - 1)) & 1; + bsLive--; + } + zvec = (zvec << 1) | zj; + } + nextSym = perm[zt][zvec - base[zt][zn]]; + } + continue; + } + } + } + + private void setupBlock() { + int[] cftab = new int[257]; + char ch; + + cftab[0] = 0; + for (i = 1; i <= 256; i++) { + cftab[i] = unzftab[i - 1]; + } + for (i = 1; i <= 256; i++) { + cftab[i] += cftab[i - 1]; + } + + for (i = 0; i <= last; i++) { + ch = (char) ll8[i]; + tt[cftab[ch]] = i; + cftab[ch]++; + } + cftab = null; + + tPos = tt[origPtr]; + + count = 0; + i2 = 0; + ch2 = 256; /* not a char and not EOF */ + + if (blockRandomised) { + rNToGo = 0; + rTPos = 0; + setupRandPartA(); + } else { + setupNoRandPartA(); + } + } + + private void setupRandPartA() { + if (i2 <= last) { + chPrev = ch2; + ch2 = ll8[tPos]; + tPos = tt[tPos]; + if (rNToGo == 0) { + rNToGo = rNums[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + ch2 ^= (int) ((rNToGo == 1) ? 1 : 0); + i2++; + + currentChar = ch2; + currentState = RAND_PART_B_STATE; + mCrc.updateCRC(ch2); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupNoRandPartA() { + if (i2 <= last) { + chPrev = ch2; + ch2 = ll8[tPos]; + tPos = tt[tPos]; + i2++; + + currentChar = ch2; + currentState = NO_RAND_PART_B_STATE; + mCrc.updateCRC(ch2); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupRandPartB() { + if (ch2 != chPrev) { + currentState = RAND_PART_A_STATE; + count = 1; + setupRandPartA(); + } else { + count++; + if (count >= 4) { + z = ll8[tPos]; + tPos = tt[tPos]; + if (rNToGo == 0) { + rNToGo = rNums[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + z ^= ((rNToGo == 1) ? 1 : 0); + j2 = 0; + currentState = RAND_PART_C_STATE; + setupRandPartC(); + } else { + currentState = RAND_PART_A_STATE; + setupRandPartA(); + } + } + } + + private void setupRandPartC() { + if (j2 < (int) z) { + currentChar = ch2; + mCrc.updateCRC(ch2); + j2++; + } else { + currentState = RAND_PART_A_STATE; + i2++; + count = 0; + setupRandPartA(); + } + } + + private void setupNoRandPartB() { + if (ch2 != chPrev) { + currentState = NO_RAND_PART_A_STATE; + count = 1; + setupNoRandPartA(); + } else { + count++; + if (count >= 4) { + z = ll8[tPos]; + tPos = tt[tPos]; + currentState = NO_RAND_PART_C_STATE; + j2 = 0; + setupNoRandPartC(); + } else { + currentState = NO_RAND_PART_A_STATE; + setupNoRandPartA(); + } + } + } + + private void setupNoRandPartC() { + if (j2 < (int) z) { + currentChar = ch2; + mCrc.updateCRC(ch2); + j2++; + } else { + currentState = NO_RAND_PART_A_STATE; + i2++; + count = 0; + setupNoRandPartA(); + } + } + + private void setDecompressStructureSizes(int newSize100k) { + if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k + && blockSize100k <= 9)) { + // throw new IOException("Invalid block size"); + } + + blockSize100k = newSize100k; + + if (newSize100k == 0) { + return; + } + + int n = baseBlockSize * newSize100k; + ll8 = new char[n]; + tt = new int[n]; + } +} + diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java new file mode 100644 index 000000000..521cfe70d --- /dev/null +++ b/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -0,0 +1,1665 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001-2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "Ant" and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.tools.bzip2; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * An output stream that compresses into the BZip2 format (without the file + * header chars) into another stream. + * + * @author Keiron Liddle + * + * TODO: Update to BZip2 1.0.1 + */ +public class CBZip2OutputStream extends OutputStream implements BZip2Constants { + protected static final int SETMASK = (1 << 21); + protected static final int CLEARMASK = (~SETMASK); + protected static final int GREATER_ICOST = 15; + protected static final int LESSER_ICOST = 0; + protected static final int SMALL_THRESH = 20; + protected static final int DEPTH_THRESH = 10; + + /* + If you are ever unlucky/improbable enough + to get a stack overflow whilst sorting, + increase the following constant and try + again. In practice I have never seen the + stack go above 27 elems, so the following + limit seems very generous. + */ + protected static final int QSORT_STACK_SIZE = 1000; + + private static void panic() { + System.out.println("panic"); + //throw new CError(); + } + + private void makeMaps() { + int i; + nInUse = 0; + for (i = 0; i < 256; i++) { + if (inUse[i]) { + seqToUnseq[nInUse] = (char) i; + unseqToSeq[i] = (char) nInUse; + nInUse++; + } + } + } + + protected static void hbMakeCodeLengths(char[] len, int[] freq, + int alphaSize, int maxLen) { + /* + Nodes and heap entries run from 1. Entry 0 + for both the heap and nodes is a sentinel. + */ + int nNodes, nHeap, n1, n2, i, j, k; + boolean tooLong; + + int[] heap = new int[MAX_ALPHA_SIZE + 2]; + int[] weight = new int[MAX_ALPHA_SIZE * 2]; + int[] parent = new int[MAX_ALPHA_SIZE * 2]; + + for (i = 0; i < alphaSize; i++) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + while (true) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + { + int zz, tmp; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + } + if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { + panic(); + } + + while (nHeap > 1) { + n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; + } + n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + { + int zz = 0, yy = 0, tmp = 0; + zz = 1; + tmp = heap[zz]; + while (true) { + yy = zz << 1; + if (yy > nHeap) { + break; + } + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) { + yy++; + } + if (weight[tmp] < weight[heap[yy]]) { + break; + } + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; + } + nNodes++; + parent[n1] = parent[n2] = nNodes; + + weight[nNodes] = ((weight[n1] & 0xffffff00) + + (weight[n2] & 0xffffff00)) + | (1 + (((weight[n1] & 0x000000ff) > + (weight[n2] & 0x000000ff)) ? + (weight[n1] & 0x000000ff) : + (weight[n2] & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + { + int zz = 0, tmp = 0; + zz = nHeap; + tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + } + if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { + panic(); + } + + tooLong = false; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i - 1] = (char) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (!tooLong) { + break; + } + + for (i = 1; i < alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } + } + + /* + index of the last char in the block, so + the block size == last + 1. + */ + int last; + + /* + index in zptr[] of original string after sorting. + */ + int origPtr; + + /* + always: in the range 0 .. 9. + The current block size is 100000 * this number. + */ + int blockSize100k; + + boolean blockRandomised; + + int bytesOut; + int bsBuff; + int bsLive; + CRC mCrc = new CRC(); + + private boolean[] inUse = new boolean[256]; + private int nInUse; + + private char[] seqToUnseq = new char[256]; + private char[] unseqToSeq = new char[256]; + + private char[] selector = new char[MAX_SELECTORS]; + private char[] selectorMtf = new char[MAX_SELECTORS]; + + private char[] block; + private int[] quadrant; + private int[] zptr; + private short[] szptr; + private int[] ftab; + + private int nMTF; + + private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; + + /* + * Used when sorting. If too many long comparisons + * happen, we stop sorting, randomise the block + * slightly, and try again. + */ + private int workFactor; + private int workDone; + private int workLimit; + private boolean firstAttempt; + private int nBlocksRandomised; + + private int currentChar = -1; + private int runLength = 0; + + public CBZip2OutputStream(OutputStream inStream) throws IOException { + this(inStream, 9); + } + + public CBZip2OutputStream(OutputStream inStream, int inBlockSize) + throws IOException { + block = null; + quadrant = null; + zptr = null; + ftab = null; + + bsSetStream(inStream); + + workFactor = 50; + if (inBlockSize > 9) { + inBlockSize = 9; + } + if (inBlockSize < 1) { + inBlockSize = 1; + } + blockSize100k = inBlockSize; + allocateCompressStructures(); + initialize(); + initBlock(); + } + + /** + * + * modified by Oliver Merkel, 010128 + * + */ + public void write(int bv) throws IOException { + int b = (256 + bv) % 256; + if (currentChar != -1) { + if (currentChar == b) { + runLength++; + if (runLength > 254) { + writeRun(); + currentChar = -1; + runLength = 0; + } + } else { + writeRun(); + runLength = 1; + currentChar = b; + } + } else { + currentChar = b; + runLength++; + } + } + + private void writeRun() throws IOException { + if (last < allowableBlockSize) { + inUse[currentChar] = true; + for (int i = 0; i < runLength; i++) { + mCrc.updateCRC((char) currentChar); + } + switch (runLength) { + case 1: + last++; + block[last + 1] = (char) currentChar; + break; + case 2: + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + break; + case 3: + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + break; + default: + inUse[runLength - 4] = true; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) currentChar; + last++; + block[last + 1] = (char) (runLength - 4); + break; + } + } else { + endBlock(); + initBlock(); + writeRun(); + } + } + + boolean closed = false; + + protected void finalize() throws Throwable { + close(); + super.finalize(); + } + + public void close() throws IOException { + if (closed) { + return; + } + + if (runLength > 0) { + writeRun(); + } + currentChar = -1; + endBlock(); + endCompression(); + closed = true; + super.close(); + bsStream.close(); + } + + public void flush() throws IOException { + super.flush(); + bsStream.flush(); + } + + private int blockCRC, combinedCRC; + + private void initialize() throws IOException { + bytesOut = 0; + nBlocksRandomised = 0; + + /* Write `magic' bytes h indicating file-format == huffmanised, + followed by a digit indicating blockSize100k. + */ + bsPutUChar('h'); + bsPutUChar('0' + blockSize100k); + + combinedCRC = 0; + } + + private int allowableBlockSize; + + private void initBlock() { + // blockNo++; + mCrc.initialiseCRC(); + last = -1; + // ch = 0; + + for (int i = 0; i < 256; i++) { + inUse[i] = false; + } + + /* 20 is just a paranoia constant */ + allowableBlockSize = baseBlockSize * blockSize100k - 20; + } + + private void endBlock() throws IOException { + blockCRC = mCrc.getFinalCRC(); + combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31); + combinedCRC ^= blockCRC; + + /* sort the block and establish posn of original string */ + doReversibleTransformation(); + + /* + A 6-byte block header, the value chosen arbitrarily + as 0x314159265359 :-). A 32 bit value does not really + give a strong enough guarantee that the value will not + appear by chance in the compressed datastream. Worst-case + probability of this event, for a 900k block, is about + 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. + For a compressed file of size 100Gb -- about 100000 blocks -- + only a 48-bit marker will do. NB: normal compression/ + decompression do *not* rely on these statistical properties. + They are only important when trying to recover blocks from + damaged files. + */ + bsPutUChar(0x31); + bsPutUChar(0x41); + bsPutUChar(0x59); + bsPutUChar(0x26); + bsPutUChar(0x53); + bsPutUChar(0x59); + + /* Now the block's CRC, so it is in a known place. */ + bsPutint(blockCRC); + + /* Now a single bit indicating randomisation. */ + if (blockRandomised) { + bsW(1, 1); + nBlocksRandomised++; + } else { + bsW(1, 0); + } + + /* Finally, block's contents proper. */ + moveToFrontCodeAndSend(); + } + + private void endCompression() throws IOException { + /* + Now another magic 48-bit number, 0x177245385090, to + indicate the end of the last block. (sqrt(pi), if + you want to know. I did want to use e, but it contains + too much repetition -- 27 18 28 18 28 46 -- for me + to feel statistically comfortable. Call me paranoid.) + */ + bsPutUChar(0x17); + bsPutUChar(0x72); + bsPutUChar(0x45); + bsPutUChar(0x38); + bsPutUChar(0x50); + bsPutUChar(0x90); + + bsPutint(combinedCRC); + + bsFinishedWithStream(); + } + + private void hbAssignCodes (int[] code, char[] length, int minLen, + int maxLen, int alphaSize) { + int n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + } + }; + vec <<= 1; + } + } + + private void bsSetStream(OutputStream f) { + bsStream = f; + bsLive = 0; + bsBuff = 0; + bytesOut = 0; + } + + private void bsFinishedWithStream() throws IOException { + while (bsLive > 0) { + int ch = (bsBuff >> 24); + try { + bsStream.write(ch); // write 8-bit + } catch (IOException e) { + throw e; + } + bsBuff <<= 8; + bsLive -= 8; + bytesOut++; + } + } + + private void bsW(int n, int v) throws IOException { + while (bsLive >= 8) { + int ch = (bsBuff >> 24); + try { + bsStream.write(ch); // write 8-bit + } catch (IOException e) { + throw e; + } + bsBuff <<= 8; + bsLive -= 8; + bytesOut++; + } + bsBuff |= (v << (32 - bsLive - n)); + bsLive += n; + } + + private void bsPutUChar(int c) throws IOException { + bsW(8, c); + } + + private void bsPutint(int u) throws IOException { + bsW(8, (u >> 24) & 0xff); + bsW(8, (u >> 16) & 0xff); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); + } + + private void bsPutIntVS(int numBits, int c) throws IOException { + bsW(numBits, c); + } + + private void sendMTFValues() throws IOException { + char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; + + int v, t, i, j, gs, ge, totc, bt, bc, iter; + int nSelectors = 0, alphaSize, minLen, maxLen, selCtr; + int nGroups; + + alphaSize = nInUse + 2; + for (t = 0; t < N_GROUPS; t++) { + for (v = 0; v < alphaSize; v++) { + len[t][v] = (char) GREATER_ICOST; + } + } + + /* Decide how many coding tables to use */ + if (nMTF <= 0) { + panic(); + } + + if (nMTF < 200) { + nGroups = 2; + } else if (nMTF < 600) { + nGroups = 3; + } else if (nMTF < 1200) { + nGroups = 4; + } else if (nMTF < 2400) { + nGroups = 5; + } else { + nGroups = 6; + } + + /* Generate an initial set of coding tables */ { + int nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs - 1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize - 1) { + ge++; + aFreq += mtfFreq[ge]; + } + + if (ge > gs && nPart != nGroups && nPart != 1 + && ((nGroups - nPart) % 2 == 1)) { + aFreq -= mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) { + if (v >= gs && v <= ge) { + len[nPart - 1][v] = (char) LESSER_ICOST; + } else { + len[nPart - 1][v] = (char) GREATER_ICOST; + } + } + + nPart--; + gs = ge + 1; + remF -= aFreq; + } + } + + int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; + int[] fave = new int[N_GROUPS]; + short[] cost = new short[N_GROUPS]; + /* + Iterate up to N_ITERS times to improve the tables. + */ + for (iter = 0; iter < N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) { + fave[t] = 0; + } + + for (t = 0; t < nGroups; t++) { + for (v = 0; v < alphaSize; v++) { + rfreq[t][v] = 0; + } + } + + nSelectors = 0; + totc = 0; + gs = 0; + while (true) { + + /* Set group start & end marks. */ + if (gs >= nMTF) { + break; + } + ge = gs + G_SIZE - 1; + if (ge >= nMTF) { + ge = nMTF - 1; + } + + /* + Calculate the cost of this group as coded + by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) { + cost[t] = 0; + } + + if (nGroups == 6) { + short cost0, cost1, cost2, cost3, cost4, cost5; + cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; + for (i = gs; i <= ge; i++) { + short icv = szptr[i]; + cost0 += len[0][icv]; + cost1 += len[1][icv]; + cost2 += len[2][icv]; + cost3 += len[3][icv]; + cost4 += len[4][icv]; + cost5 += len[5][icv]; + } + cost[0] = cost0; + cost[1] = cost1; + cost[2] = cost2; + cost[3] = cost3; + cost[4] = cost4; + cost[5] = cost5; + } else { + for (i = gs; i <= ge; i++) { + short icv = szptr[i]; + for (t = 0; t < nGroups; t++) { + cost[t] += len[t][icv]; + } + } + } + + /* + Find the coding table which is best for this group, + and record its identity in the selector table. + */ + bc = 999999999; + bt = -1; + for (t = 0; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + }; + totc += bc; + fave[bt]++; + selector[nSelectors] = (char) bt; + nSelectors++; + + /* + Increment the symbol frequencies for the selected table. + */ + for (i = gs; i <= ge; i++) { + rfreq[bt][szptr[i]]++; + } + + gs = ge + 1; + } + + /* + Recompute the tables based on the accumulated frequencies. + */ + for (t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); + } + } + + rfreq = null; + fave = null; + cost = null; + + if (!(nGroups < 8)) { + panic(); + } + if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) { + panic(); + } + + + /* Compute MTF values for the selectors. */ + { + char[] pos = new char[N_GROUPS]; + char ll_i, tmp2, tmp; + for (i = 0; i < nGroups; i++) { + pos[i] = (char) i; + } + for (i = 0; i < nSelectors; i++) { + ll_i = selector[i]; + j = 0; + tmp = pos[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + } + pos[0] = tmp; + selectorMtf[i] = (char) j; + } + } + + int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE]; + + /* Assign actual codes for the tables. */ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (len[t][i] > maxLen) { + maxLen = len[t][i]; + } + if (len[t][i] < minLen) { + minLen = len[t][i]; + } + } + if (maxLen > 20) { + panic(); + } + if (minLen < 1) { + panic(); + } + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); + } + + /* Transmit the mapping table. */ + { + boolean[] inUse16 = new boolean[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = false; + for (j = 0; j < 16; j++) { + if (inUse[i * 16 + j]) { + inUse16[i] = true; + } + } + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + bsW(1, 1); + } else { + bsW(1, 0); + } + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + for (j = 0; j < 16; j++) { + if (inUse[i * 16 + j]) { + bsW(1, 1); + } else { + bsW(1, 0); + } + } + } + } + + } + + /* Now the selectors. */ + bsW (3, nGroups); + bsW (15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < selectorMtf[i]; j++) { + bsW(1, 1); + } + bsW(1, 0); + } + + /* Now the coding tables. */ + for (t = 0; t < nGroups; t++) { + int curr = len[t][0]; + bsW(5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < len[t][i]) { + bsW(2, 2); + curr++; /* 10 */ + } + while (curr > len[t][i]) { + bsW(2, 3); + curr--; /* 11 */ + } + bsW (1, 0); + } + } + + /* And finally, the block data proper */ + selCtr = 0; + gs = 0; + while (true) { + if (gs >= nMTF) { + break; + } + ge = gs + G_SIZE - 1; + if (ge >= nMTF) { + ge = nMTF - 1; + } + for (i = gs; i <= ge; i++) { + bsW(len[selector[selCtr]][szptr[i]], + code[selector[selCtr]][szptr[i]]); + } + + gs = ge + 1; + selCtr++; + } + if (!(selCtr == nSelectors)) { + panic(); + } + } + + private void moveToFrontCodeAndSend () throws IOException { + bsPutIntVS(24, origPtr); + generateMTFValues(); + sendMTFValues(); + } + + private OutputStream bsStream; + + private void simpleSort(int lo, int hi, int d) { + int i, j, h, bigN, hp; + int v; + + bigN = hi - lo + 1; + if (bigN < 2) { + return; + } + + hp = 0; + while (incs[hp] < bigN) { + hp++; + } + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (true) { + /* copy 1 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + /* copy 2 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + /* copy 3 */ + if (i > hi) { + break; + } + v = zptr[i]; + j = i; + while (fullGtU(zptr[j - h] + d, v + d)) { + zptr[j] = zptr[j - h]; + j = j - h; + if (j <= (lo + h - 1)) { + break; + } + } + zptr[j] = v; + i++; + + if (workDone > workLimit && firstAttempt) { + return; + } + } + } + } + + private void vswap(int p1, int p2, int n) { + int temp = 0; + while (n > 0) { + temp = zptr[p1]; + zptr[p1] = zptr[p2]; + zptr[p2] = temp; + p1++; + p2++; + n--; + } + } + + private char med3(char a, char b, char c) { + char t; + if (a > b) { + t = a; + a = b; + b = t; + } + if (b > c) { + t = b; + b = c; + c = t; + } + if (a > b) { + b = a; + } + return b; + } + + private static class StackElem { + int ll; + int hh; + int dd; + } + + private void qSort3(int loSt, int hiSt, int dSt) { + int unLo, unHi, ltLo, gtHi, med, n, m; + int sp, lo, hi, d; + StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; + for (int count = 0; count < QSORT_STACK_SIZE; count++) { + stack[count] = new StackElem(); + } + + sp = 0; + + stack[sp].ll = loSt; + stack[sp].hh = hiSt; + stack[sp].dd = dSt; + sp++; + + while (sp > 0) { + if (sp >= QSORT_STACK_SIZE) { + panic(); + } + + sp--; + lo = stack[sp].ll; + hi = stack[sp].hh; + d = stack[sp].dd; + + if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { + simpleSort(lo, hi, d); + if (workDone > workLimit && firstAttempt) { + return; + } + continue; + } + + med = med3(block[zptr[lo] + d + 1], + block[zptr[hi ] + d + 1], + block[zptr[(lo + hi) >> 1] + d + 1]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (true) { + while (true) { + if (unLo > unHi) { + break; + } + n = ((int) block[zptr[unLo] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[ltLo]; + zptr[ltLo] = temp; + ltLo++; + unLo++; + continue; + }; + if (n > 0) { + break; + } + unLo++; + } + while (true) { + if (unLo > unHi) { + break; + } + n = ((int) block[zptr[unHi] + d + 1]) - med; + if (n == 0) { + int temp = 0; + temp = zptr[unHi]; + zptr[unHi] = zptr[gtHi]; + zptr[gtHi] = temp; + gtHi--; + unHi--; + continue; + }; + if (n < 0) { + break; + } + unHi--; + } + if (unLo > unHi) { + break; + } + int temp = 0; + temp = zptr[unLo]; + zptr[unLo] = zptr[unHi]; + zptr[unHi] = temp; + unLo++; + unHi--; + } + + if (gtHi < ltLo) { + stack[sp].ll = lo; + stack[sp].hh = hi; + stack[sp].dd = d + 1; + sp++; + continue; + } + + n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); + vswap(lo, unLo - n, n); + m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); + vswap(unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack[sp].ll = lo; + stack[sp].hh = n; + stack[sp].dd = d; + sp++; + + stack[sp].ll = n + 1; + stack[sp].hh = m - 1; + stack[sp].dd = d + 1; + sp++; + + stack[sp].ll = m; + stack[sp].hh = hi; + stack[sp].dd = d; + sp++; + } + } + + private void mainSort() { + int i, j, ss, sb; + int[] runningOrder = new int[256]; + int[] copy = new int[256]; + boolean[] bigDone = new boolean[256]; + int c1, c2; + int numQSorted; + + /* + In the various block-sized structures, live data runs + from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, + set up the overshoot area for block. + */ + + // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); + for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + block[last + i + 2] = block[(i % (last + 1)) + 1]; + } + for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { + quadrant[i] = 0; + } + + block[0] = (char) (block[last + 1]); + + if (last < 4000) { + /* + Use simpleSort(), since the full sorting mechanism + has quite a large constant overhead. + */ + for (i = 0; i <= last; i++) { + zptr[i] = i; + } + firstAttempt = false; + workDone = workLimit = 0; + simpleSort(0, last, 0); + } else { + numQSorted = 0; + for (i = 0; i <= 255; i++) { + bigDone[i] = false; + } + + for (i = 0; i <= 65536; i++) { + ftab[i] = 0; + } + + c1 = block[0]; + for (i = 0; i <= last; i++) { + c2 = block[i + 1]; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } + + c1 = block[1]; + for (i = 0; i < last; i++) { + c2 = block[i + 2]; + j = (c1 << 8) + c2; + c1 = c2; + ftab[j]--; + zptr[ftab[j]] = i; + } + + j = ((block[last + 1]) << 8) + (block[1]); + ftab[j]--; + zptr[ftab[j]] = last; + + /* + Now ftab contains the first loc of every small bucket. + Calculate the running order, from smallest to largest + big bucket. + */ + + for (i = 0; i <= 255; i++) { + runningOrder[i] = i; + } + + { + int vv; + int h = 1; + do { + h = 3 * h + 1; + } + while (h <= 256); + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while ((ftab[((runningOrder[j - h]) + 1) << 8] + - ftab[(runningOrder[j - h]) << 8]) > + (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { + runningOrder[j] = runningOrder[j - h]; + j = j - h; + if (j <= (h - 1)) { + break; + } + } + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + The main sorting loop. + */ + for (i = 0; i <= 255; i++) { + + /* + Process big buckets, starting with the least full. + */ + ss = runningOrder[i]; + + /* + Complete the big bucket [ss] by quicksorting + any unsorted small buckets [ss, j]. Hopefully + previous pointer-scanning phases have already + completed many of the small buckets [ss, j], so + we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + sb = (ss << 8) + j; + if (!((ftab[sb] & SETMASK) == SETMASK)) { + int lo = ftab[sb] & CLEARMASK; + int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + qSort3(lo, hi, 2); + numQSorted += (hi - lo + 1); + if (workDone > workLimit && firstAttempt) { + return; + } + } + ftab[sb] |= SETMASK; + } + } + + /* + The ss big bucket is now done. Record this fact, + and update the quadrant descriptors. Remember to + update quadrants in the overshoot area too, if + necessary. The "if (i < 255)" test merely skips + this updating for the last bucket processed, since + updating for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + int bbStart = ftab[ss << 8] & CLEARMASK; + int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (j = 0; j < bbSize; j++) { + int a2update = zptr[bbStart + j]; + int qVal = (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + last + 1] = qVal; + } + } + + if (!(((bbSize - 1) >> shifts) <= 65535)) { + panic(); + } + } + + /* + Now scan this big bucket so as to synthesise the + sorted order for small buckets [t, ss] for all t != ss. + */ + for (j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (j = ftab[ss << 8] & CLEARMASK; + j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { + c1 = block[zptr[j]]; + if (!bigDone[c1]) { + zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; + copy[c1]++; + } + } + + for (j = 0; j <= 255; j++) { + ftab[(j << 8) + ss] |= SETMASK; + } + } + } + } + + private void randomiseBlock() { + int i; + int rNToGo = 0; + int rTPos = 0; + for (i = 0; i < 256; i++) { + inUse[i] = false; + } + + for (i = 0; i <= last; i++) { + if (rNToGo == 0) { + rNToGo = (char) rNums[rTPos]; + rTPos++; + if (rTPos == 512) { + rTPos = 0; + } + } + rNToGo--; + block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); + // handle 16 bit signed numbers + block[i + 1] &= 0xFF; + + inUse[block[i + 1]] = true; + } + } + + private void doReversibleTransformation() { + int i; + + workLimit = workFactor * last; + workDone = 0; + blockRandomised = false; + firstAttempt = true; + + mainSort(); + + if (workDone > workLimit && firstAttempt) { + randomiseBlock(); + workLimit = workDone = 0; + blockRandomised = true; + firstAttempt = false; + mainSort(); + } + + origPtr = -1; + for (i = 0; i <= last; i++) { + if (zptr[i] == 0) { + origPtr = i; + break; + } + }; + + if (origPtr == -1) { + panic(); + } + } + + private boolean fullGtU(int i1, int i2) { + int k; + char c1, c2; + int s1, s2; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + i1++; + i2++; + + k = last + 1; + + do { + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + c1 = block[i1 + 1]; + c2 = block[i2 + 1]; + if (c1 != c2) { + return (c1 > c2); + } + s1 = quadrant[i1]; + s2 = quadrant[i2]; + if (s1 != s2) { + return (s1 > s2); + } + i1++; + i2++; + + if (i1 > last) { + i1 -= last; + i1--; + }; + if (i2 > last) { + i2 -= last; + i2--; + }; + + k -= 4; + workDone++; + } while (k >= 0); + + return false; + } + + /* + Knuth's increments seem to work better + than Incerpi-Sedgewick here. Possibly + because the number of elems to sort is + usually small, typically <= 20. + */ + private int[] incs = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 }; + + private void allocateCompressStructures () { + int n = baseBlockSize * blockSize100k; + block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; + quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; + zptr = new int[n]; + ftab = new int[65537]; + + if (block == null || quadrant == null || zptr == null + || ftab == null) { + //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; + //compressOutOfMemory ( totalDraw, n ); + } + + /* + The back end needs a place to store the MTF values + whilst it calculates the coding tables. We could + put them in the zptr array. However, these values + will fit in a short, so we overlay szptr at the + start of zptr, in the hope of reducing the number + of cache misses induced by the multiple traversals + of the MTF values when calculating coding tables. + Seems to improve compression speed by about 1%. + */ + // szptr = zptr; + + + szptr = new short[2 * n]; + } + + private void generateMTFValues() { + char[] yy = new char[256]; + int i, j; + char tmp; + char tmp2; + int zPend; + int wr; + int EOB; + + makeMaps(); + EOB = nInUse + 1; + + for (i = 0; i <= EOB; i++) { + mtfFreq[i] = 0; + } + + wr = 0; + zPend = 0; + for (i = 0; i < nInUse; i++) { + yy[i] = (char) i; + } + + + for (i = 0; i <= last; i++) { + char ll_i; + + ll_i = unseqToSeq[block[zptr[i]]]; + + j = 0; + tmp = yy[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + }; + yy[0] = tmp; + + if (j == 0) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (true) { + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; + wr++; + mtfFreq[RUNA]++; + break; + case 1: + szptr[wr] = (short) RUNB; + wr++; + mtfFreq[RUNB]++; + break; + }; + if (zPend < 2) { + break; + } + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + szptr[wr] = (short) (j + 1); + wr++; + mtfFreq[j + 1]++; + } + } + + if (zPend > 0) { + zPend--; + while (true) { + switch (zPend % 2) { + case 0: + szptr[wr] = (short) RUNA; + wr++; + mtfFreq[RUNA]++; + break; + case 1: + szptr[wr] = (short) RUNB; + wr++; + mtfFreq[RUNB]++; + break; + } + if (zPend < 2) { + break; + } + zPend = (zPend - 2) / 2; + } + } + + szptr[wr] = (short) EOB; + wr++; + mtfFreq[EOB]++; + + nMTF = wr; + } +} + + diff --git a/src/main/java/org/apache/tools/bzip2/CRC.java b/src/main/java/org/apache/tools/bzip2/CRC.java new file mode 100644 index 000000000..bc8bc644b --- /dev/null +++ b/src/main/java/org/apache/tools/bzip2/CRC.java @@ -0,0 +1,167 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001-2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "Ant" and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.tools.bzip2; + +/** + * A simple class the hold and calculate the CRC for sanity checking + * of the data. + * + * @author Keiron Liddle + */ +class CRC { + public static int crc32Table[] = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 + }; + + public CRC() { + initialiseCRC(); + } + + void initialiseCRC() { + globalCrc = 0xffffffff; + } + + int getFinalCRC() { + return ~globalCrc; + } + + int getGlobalCRC() { + return globalCrc; + } + + void setGlobalCRC(int newCrc) { + globalCrc = newCrc; + } + + void updateCRC(int inCh) { + int temp = (globalCrc >> 24) ^ inCh; + if (temp < 0) { + temp = 256 + temp; + } + globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; + } + + int globalCrc; +} +