diff --git a/.classpath b/.classpath
index 4fde3ef69..80c2952c7 100644
--- a/.classpath
+++ b/.classpath
@@ -11,8 +11,10 @@
+
+
+
-
diff --git a/LICENSE.txt b/LICENSE.txt
index 69409bed8..0721cdf14 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,45 @@
/*
* (c) Copyright 2010 Metaweb Technologies, Inc. All rights reserved.
- *
*/
+
+See the 'licenses' directory for a list of the licenses for the libraries we depend on,
+ordered here by license:
+
+licenses/apache2.0.LICENSE.txt
+ jetty
+ jetty-util
+ servlet-api
+ commons-lang
+ commons-codec
+ log4j
+ xmlbeans
+ poi
+ poi-ooxml
+ poi-ooxml-schemas
+ jdatapath
+
+licenses/secondstring.LICENSE.txt (BSD family)
+ secondstring
+
+licenses/dom4j.LICENSE.txt (BSD family)
+ dom4j
+
+licenses/simile.LICENSE.txt (BSD family)
+ vicino
+
+licenses/arithcode.LICENSE.txt (BSD family)
+ arithcode
+
+licenses/icu4j.LICENSE.txt (MIT family)
+ icu4j
+
+licenses/slf4j.LICENSE.txt (MIT family)
+ slf4j-api
+ slf4j-log4j12
+ jcl-over-slf4j
+
+licenses/json.LICENSE.txt (MIT family)
+ json
+
+licenses/cos.LICENSE.txt
+ cos
\ No newline at end of file
diff --git a/lib-src/arithcode-1.1-sources.jar b/lib-src/arithcode-1.1-sources.jar
new file mode 100644
index 000000000..c4569b6e9
Binary files /dev/null and b/lib-src/arithcode-1.1-sources.jar differ
diff --git a/lib-src/secondstring-20100303-sources.jar b/lib-src/secondstring-20100303-sources.jar
new file mode 100644
index 000000000..0fa453d45
Binary files /dev/null and b/lib-src/secondstring-20100303-sources.jar differ
diff --git a/lib/arithcode-1.1.jar b/lib/arithcode-1.1.jar
new file mode 100644
index 000000000..8ab9142db
Binary files /dev/null and b/lib/arithcode-1.1.jar differ
diff --git a/lib/secondstring-20100303.jar b/lib/secondstring-20100303.jar
new file mode 100644
index 000000000..427fcbbcc
Binary files /dev/null and b/lib/secondstring-20100303.jar differ
diff --git a/licenses/apache2.0.LICENSE.txt b/licenses/apache2.0.LICENSE.txt
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/licenses/apache2.0.LICENSE.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/licenses/arithcode.LICENSE.txt b/licenses/arithcode.LICENSE.txt
new file mode 100644
index 000000000..de5381ef9
--- /dev/null
+++ b/licenses/arithcode.LICENSE.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2002, Bob Carpenter.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of colloquial.com nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses/cos.LICENSE.txt b/licenses/cos.LICENSE.txt
new file mode 100644
index 000000000..e6b336ac8
--- /dev/null
+++ b/licenses/cos.LICENSE.txt
@@ -0,0 +1,82 @@
+Copyright (C) 2001-2009 by Jason Hunter, jhunter@servlets.com.
+All rights reserved.
+
+The source code, object code, and documentation in the com.oreilly.servlet
+package is copyright and owned by Jason Hunter.
+
+
+ON-SITE USE RIGHTS
+
+Permission is granted to use the com.oreilly.servlet.* packages in the
+development of any *non-commercial* project. For this use you are granted
+a non-exclusive, non-transferable limited license at no cost.
+
+For a *commercial* project, permission is granted to use the
+com.oreilly.servlet.* packages provided that every person on the development
+team for that project owns a copy of the book Java Servlet Programming
+(O'Reilly) in its most recent edition. The most recent edition is currently
+the 2nd Edition, available in association with Amazon.com at
+http://www.amazon.com/exec/obidos/ASIN/0596000405/jasonhunter.
+
+Other (sometimes cheaper) license terms are available upon request; please
+write to jhunter@servlets.com for more information.
+
+
+REDISTRIBUTION RIGHTS
+
+Commercial redistribution rights of the com.oreilly.servlet.* packages are
+available by writing jhunter@servlets.com.
+
+Non-commercial redistribution is permitted provided that:
+
+1. You redistribute the package in object code form only (as Java .class files
+or a .jar file containing the .class files) and only as part of a product that
+uses the classes as part of its primary functionality.
+
+2. The product containing the package is non-commercial in nature.
+
+3. The public interface to the classes in the package, and the public
+interface to any classes with similar functionality, is hidden from end users
+when engaged in normal use of the product.
+
+4. The distribution is not part of a software development kit, operating
+system, other library, or a development tool without written permission from
+the copyright holder.
+
+5. The distribution includes copyright notice as follows: "The source code,
+object code, and documentation in the com.oreilly.servlet package is copyright
+and owned by Jason Hunter." in the documentation and/or other materials
+provided with the distribution.
+
+6. You reproduce the above copyright notice, this list of conditions, and the
+following disclaimer in the documentation and/or other materials provided with
+the distribution.
+
+7. Licensor retains title to and ownership of the Software and all
+enhancements, modifications, and updates to the Software.
+
+Note that the com.oreilly.servlet package is provided "as is" and the author
+will not be liable for any damages suffered as a result of your use.
+Furthermore, you understand the package comes without any guarantee of
+technical support.
+
+You can always find the latest version of the com.oreilly.servlet package at
+http://www.servlets.com.
+
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+Thanks,
+
+Jason Hunter
+jhunter AT servlets.com
diff --git a/licenses/dom4j.LICENSE.txt b/licenses/dom4j.LICENSE.txt
new file mode 100644
index 000000000..c86228f19
--- /dev/null
+++ b/licenses/dom4j.LICENSE.txt
@@ -0,0 +1,27 @@
+Redistribution and use of this software and associated documentation ("Software"),
+with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain copyright statements and notices.
+Redistributions must also contain a copy of this document.
+2. Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation and/or other
+materials provided with the distribution.
+3. The name "DOM4J" must not be used to endorse or promote products derived from this
+Software without prior written permission of MetaStuff, Ltd. For written permission,
+please contact dom4j-info@metastuff.com.
+4. Products derived from this Software may not be called "DOM4J" nor may "DOM4J"
+appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a
+registered trademark of MetaStuff, Ltd.
+5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
+
+THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
\ No newline at end of file
diff --git a/licenses/icu4j.LICENSE.txt b/licenses/icu4j.LICENSE.txt
new file mode 100644
index 000000000..677f38878
--- /dev/null
+++ b/licenses/icu4j.LICENSE.txt
@@ -0,0 +1,28 @@
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2009 International Business Machines Corporation and others
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify, merge,
+publish, distribute, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, provided that the above copyright notice(s) and this
+permission notice appear in all copies of the Software and that both the above copyright
+notice(s) and this permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not be used
+in advertising or otherwise to promote the sale, use or other dealings in this Software
+without prior written authorization of the copyright holder.
+
diff --git a/licenses/json.LICENSE.txt b/licenses/json.LICENSE.txt
new file mode 100644
index 000000000..493319796
--- /dev/null
+++ b/licenses/json.LICENSE.txt
@@ -0,0 +1,21 @@
+Copyright (c) 2002 JSON.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+The Software shall be used for Good, not Evil.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/licenses/secondstring.LICENSE.txt b/licenses/secondstring.LICENSE.txt
new file mode 100644
index 000000000..b65a2af97
--- /dev/null
+++ b/licenses/secondstring.LICENSE.txt
@@ -0,0 +1,41 @@
+Copyright (c) 2003 Carnegie Mellon University
+All rights reserved.
+Developed by: Center for Automated Learning and Discovery
+ Carnegie Mellon University
+ http://www.cald.cs.cmu.edu
+
+ The design and implementation of this software was supported in
+ part by National Science Foundation Grant No. EIA-0131884 to the
+ National Institute of Statistical Sciences, and by a contract
+ from the Army Research Office to the Center for Computer and
+ Communications Security with Carnegie Mellon University.
+
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal with the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimers.
+Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimers in the
+documentation and/or other materials provided with the distribution.
+Neither the names of the Center for Automated Learning and Discovery,
+or Carnegie Mellon University, nor the names of its contributors may
+be used to endorse or promote products derived from this Software
+without specific prior written permission. THE SOFTWARE IS PROVIDED
+"AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS WITH THE SOFTWARE.
+
+[This is an instance of the University of Illinois/NCSA Open Source
+agreement, obtained from http://www.opensource.org/licenses/UoI-NCSA.php]
+
diff --git a/licenses/simile.LICENSE.txt b/licenses/simile.LICENSE.txt
new file mode 100644
index 000000000..69bf77c10
--- /dev/null
+++ b/licenses/simile.LICENSE.txt
@@ -0,0 +1,29 @@
+/*
+ * (c) Copyright Massachusetts Institute of Technology & Contributors.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
diff --git a/licenses/slf4j.LICENSE.txt b/licenses/slf4j.LICENSE.txt
new file mode 100644
index 000000000..29182a587
--- /dev/null
+++ b/licenses/slf4j.LICENSE.txt
@@ -0,0 +1,22 @@
+ Copyright (c) 2004-2008 QOS.ch
+ All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
\ No newline at end of file
diff --git a/src/main/java/edu/mit/simile/vicino/Distance.java b/src/main/java/edu/mit/simile/vicino/Distance.java
new file mode 100644
index 000000000..e9da35d46
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Distance.java
@@ -0,0 +1,7 @@
+package edu.mit.simile.vicino;
+
+public interface Distance {
+
+ public float d(String x, String y);
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/Distributor.java b/src/main/java/edu/mit/simile/vicino/Distributor.java
new file mode 100644
index 000000000..1ac104b7c
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Distributor.java
@@ -0,0 +1,61 @@
+package edu.mit.simile.vicino;
+
+import java.util.List;
+
+import edu.mit.simile.vicino.Distance;
+
+public class Distributor extends Operator {
+
+ private static final int COLUMNS = 70;
+ private static final char CHAR = '=';
+
+ public static void main(String[] args) throws Exception {
+
+ Distance d = getDistance(args[0]);
+
+ List strings = getStrings(args[1]);
+
+ int buckets = Integer.parseInt(args[2]);
+
+ long start = System.currentTimeMillis();
+ int[] values = new int[buckets];
+
+ int size = strings.size();
+ for (int i = 0; i < size; i++) {
+ String x = (String) strings.get((int) (Math.random() * size));
+ String y = (String) strings.get((int) (Math.random() * size));
+ int dist = (int) (buckets * d.d(x, y));
+ values[dist]++;
+ System.out.print(".");
+ }
+ System.out.println();
+
+ long stop = System.currentTimeMillis();
+ float m = ((float) (stop - start)) / (float) size;
+
+ int maxValue = 0;
+ for (int i = 0; i < buckets; i++) {
+ if (values[i] > maxValue) {
+ maxValue = values[i];
+ }
+ }
+
+ System.out
+ .println("+-------------------------------------------------------------------");
+ for (int i = 0; i < buckets; i++) {
+ System.out.println("|" + bar(COLUMNS * values[i] / maxValue));
+ }
+ System.out
+ .println("+-------------------------------------------------------------------");
+
+ System.out.println("\n Each distance calculation took: " + m + " millis");
+ }
+
+ static private String bar(int value) {
+ StringBuffer b = new StringBuffer(value);
+ for (int i = 0; i < value; i++) {
+ b.append(CHAR);
+ }
+ return b.toString();
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/Meter.java b/src/main/java/edu/mit/simile/vicino/Meter.java
new file mode 100644
index 000000000..d83defb70
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Meter.java
@@ -0,0 +1,10 @@
+package edu.mit.simile.vicino;
+
+public class Meter extends Operator {
+
+ public static void main(String[] args) throws Exception {
+ Distance d = getDistance(args[0]);
+ System.out.println(args[1] + " <- " + d.d(args[1], args[2]) + " -> " + args[2]);
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/Operator.java b/src/main/java/edu/mit/simile/vicino/Operator.java
new file mode 100644
index 000000000..83eea561c
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Operator.java
@@ -0,0 +1,47 @@
+package edu.mit.simile.vicino;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Operator {
+
+ static void log(String msg) {
+ System.out.println(msg);
+ }
+
+ static Distance getDistance(String distance) throws Exception {
+ return (Distance) Class.forName("edu.mit.simile.vicino.distances." + distance + "Distance").newInstance();
+ }
+
+ static List getStrings(String fileName) throws IOException {
+ ArrayList strings = new ArrayList();
+
+ File file = new File(fileName);
+ if (file.isDirectory()) {
+ File[] files = file.listFiles();
+ for (int i = 0; i < files.length; i++) {
+ BufferedReader input = new BufferedReader(new FileReader(files[i]));
+ StringBuffer b = new StringBuffer();
+ String line;
+ while ((line = input.readLine()) != null) {
+ b.append(line.trim());
+ }
+ input.close();
+ strings.add(b.toString());
+ }
+ } else {
+ BufferedReader input = new BufferedReader(new FileReader(fileName));
+ String line;
+ while ((line = input.readLine()) != null) {
+ strings.add(line.trim());
+ }
+ input.close();
+ }
+
+ return strings;
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/Seeker.java b/src/main/java/edu/mit/simile/vicino/Seeker.java
new file mode 100644
index 000000000..d52ea5bbf
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Seeker.java
@@ -0,0 +1,52 @@
+package edu.mit.simile.vicino;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.List;
+
+import edu.mit.simile.vicino.vptree.VPTree;
+import edu.mit.simile.vicino.vptree.VPTreeBuilder;
+import edu.mit.simile.vicino.vptree.VPTreeSeeker;
+
+public class Seeker extends Operator {
+
+ public static void main(String[] args) throws Exception {
+ Distance d = getDistance(args[0]);
+
+ log("Working with distance: " + d);
+ List strings = getStrings(args[1]);
+ log("Obtained " + strings.size() + " from " + args[1]);
+
+ log("Building VPTree...");
+ VPTreeBuilder builder = new VPTreeBuilder(d);
+ VPTree tree = builder.buildVPTree(strings);
+ log("..done");
+
+ VPTreeSeeker seeker = new VPTreeSeeker(d, tree);
+
+ log("type a string|range then hit return:");
+ BufferedReader input = new BufferedReader(new InputStreamReader(
+ System.in));
+ String line = null;
+ while ((line = input.readLine()) != null) {
+ int index = line.indexOf('|');
+ String query = line.substring(0, index);
+ float range = Float.parseFloat(line.substring(index + 1));
+ long start = System.currentTimeMillis();
+ List extends Serializable> results = seeker.range(query, range);
+ long stop = System.currentTimeMillis();
+ Iterator extends Serializable> j = results.iterator();
+ if (j.hasNext()) {
+ while (j.hasNext()) {
+ String r = (String) j.next();
+ log(" " + r);
+ }
+ log(" [done in " + (stop - start) + "ms]");
+ } else {
+ log(" [no results found in " + (stop - start) + "ms]");
+ }
+ }
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/Tester.java b/src/main/java/edu/mit/simile/vicino/Tester.java
new file mode 100644
index 000000000..b97255c6c
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/Tester.java
@@ -0,0 +1,50 @@
+package edu.mit.simile.vicino;
+
+import java.util.List;
+
+public class Tester extends Operator {
+
+ public static void main(String[] args) throws Exception {
+ Distance d = getDistance(args[0]);
+
+ List strings = getStrings(args[1]);
+
+ long start = System.currentTimeMillis();
+
+ int size = strings.size();
+ for (int i = 0; i < size; i++) {
+ String x = (String) strings.get((int) (Math.random() * size));
+ String y = (String) strings.get((int) (Math.random() * size));
+ String z = (String) strings.get((int) (Math.random() * size));
+ boolean metric = metric(x, y, z, d);
+ if (metric) {
+ System.out.println("metric");
+ } else {
+ System.out.println("***** NOT METRIC *****");
+ }
+ }
+
+ long stop = System.currentTimeMillis();
+ float m = ((float) (stop - start)) / (float) size;
+
+ System.out.println("\n Each metric evaluation took: " + m + " millis");
+ }
+
+ static boolean metric(String x, String y, String z, Distance d) {
+ float dxx = d.d(x, x);
+ boolean identity = (dxx == 0.0f);
+ float dxy = d.d(x, y);
+ float dyx = d.d(y, x);
+ boolean simmetrical = (dxy == dyx);
+ float dxz = d.d(x, z);
+ float dyz = d.d(y, z);
+ boolean triangular = (dxz <= dxy + dyz);
+ return (identity && simmetrical && triangular);
+ }
+
+ static Distance getDistance(String distance) throws Exception {
+ return (Distance) Class.forName(
+ "edu.mit.simile.vicino.distances." + distance + "Distance")
+ .newInstance();
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java b/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java
new file mode 100644
index 000000000..dea474099
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java
@@ -0,0 +1,26 @@
+package edu.mit.simile.vicino.distances;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.tools.bzip2.CBZip2OutputStream;
+
+public class BZip2Distance extends PseudoMetricDistance {
+
+ public float d2(String x, String y) {
+ String str = x + y;
+ float result = 0.0f;
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
+ CBZip2OutputStream os = new CBZip2OutputStream(baos);
+ os.write(str.getBytes());
+ os.close();
+ baos.close();
+ result = baos.toByteArray().length;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return result;
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java b/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java
new file mode 100644
index 000000000..5bbfe36ff
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java
@@ -0,0 +1,26 @@
+package edu.mit.simile.vicino.distances;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.zip.GZIPOutputStream;
+
+public class GZipDistance extends PseudoMetricDistance {
+
+ public float d2(String x, String y) {
+ String str = x + y;
+ float result = 0.0f;
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
+ GZIPOutputStream os = new GZIPOutputStream(baos);
+ os.write(str.getBytes());
+ os.close();
+ baos.close();
+ result = baos.toByteArray().length;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return result;
+
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java
new file mode 100644
index 000000000..0e4f50670
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java
@@ -0,0 +1,18 @@
+package edu.mit.simile.vicino.distances;
+
+import com.wcohen.ss.Jaccard;
+import com.wcohen.ss.api.StringDistance;
+
+public class JaccardDistance extends MetricDistance {
+
+ StringDistance distance;
+
+ public JaccardDistance() {
+ this.distance = new Jaccard();
+ }
+
+ protected float d2(String x, String y) {
+ return Math.abs((float) this.distance.score(x, y) - 1.0f);
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java
new file mode 100644
index 000000000..d17166d51
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java
@@ -0,0 +1,18 @@
+package edu.mit.simile.vicino.distances;
+
+import com.wcohen.ss.Jaro;
+import com.wcohen.ss.api.StringDistance;
+
+public class JaroDistance extends MetricDistance {
+
+ StringDistance distance;
+
+ public JaroDistance() {
+ this.distance = new Jaro();
+ }
+
+ protected float d2(String x, String y) {
+ return Math.abs((float) this.distance.score(x, y) - 1.0f);
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java b/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java
new file mode 100644
index 000000000..5eaff95a4
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/LevensteinDistance.java
@@ -0,0 +1,22 @@
+package edu.mit.simile.vicino.distances;
+
+import com.wcohen.ss.Levenstein;
+import com.wcohen.ss.api.StringDistance;
+
+import edu.mit.simile.vicino.Distance;
+
+public class LevensteinDistance implements Distance {
+
+ StringDistance distance;
+
+ public LevensteinDistance() {
+ this.distance = new Levenstein();
+ }
+
+ public float d(String x, String y) {
+ float d = Math.abs((float) this.distance.score(x, y));
+ // System.out.println(this.distance.explainScore(x,y));
+ return d / (x.length() + y.length());
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java
new file mode 100644
index 000000000..4084be1e4
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java
@@ -0,0 +1,19 @@
+package edu.mit.simile.vicino.distances;
+
+import edu.mit.simile.vicino.Distance;
+
+public abstract class MetricDistance implements Distance {
+
+ /*
+ * public float d(String x,String y) { float dxy = d2(x,y); float dx =
+ * d2(x,""); float dy = d2(y,""); float result = dxy / (dx + dy); return
+ * result; }
+ */
+
+ public float d(String x, String y) {
+ return d2(x, y);
+ }
+
+ abstract float d2(String x, String y);
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java
new file mode 100644
index 000000000..d4f31cb04
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java
@@ -0,0 +1,27 @@
+package edu.mit.simile.vicino.distances;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import com.colloquial.arithcode.ArithCodeOutputStream;
+import com.colloquial.arithcode.PPMModel;
+
+public class PPMDistance extends PseudoMetricDistance {
+
+ public float d2(String x, String y) {
+ String str = x + y;
+ float result = 0.0f;
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
+ ArithCodeOutputStream os = new ArithCodeOutputStream(baos,new PPMModel(8));
+ os.write(str.getBytes());
+ os.close();
+ baos.close();
+ result = baos.toByteArray().length;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return result;
+ }
+
+}
diff --git a/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java
new file mode 100644
index 000000000..a14f825f1
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java
@@ -0,0 +1,17 @@
+package edu.mit.simile.vicino.distances;
+
+import edu.mit.simile.vicino.Distance;
+
+public abstract class PseudoMetricDistance implements Distance {
+
+ public float d(String x, String y) {
+ float cxx = d2(x, x);
+ float cyy = d2(y, y);
+ float cxy = d2(x, y);
+ float cyx = d2(y, x);
+ float result1 = (cxy + cyx) / (cxx + cyy) - 1.0f;
+ return result1;
+ }
+
+ protected abstract float d2(String x, String y);
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/Node.java b/src/main/java/edu/mit/simile/vicino/vptree/Node.java
new file mode 100755
index 000000000..4de3f2f34
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/Node.java
@@ -0,0 +1,42 @@
+package edu.mit.simile.vicino.vptree;
+
+import java.io.Serializable;
+
+/**
+ * This class represent a couple (Object, distance) value of that Object from
+ * the Vp in each step of the algorithm.
+ *
+ * @author Paolo Ciccarese
+ */
+public class Node implements Serializable {
+
+ private static final long serialVersionUID = -2077473220894258550L;
+
+ private final Serializable obj;
+ private float distance;
+
+ public Node(Serializable obj, int i) {
+ this.obj = obj;
+ this.distance = i;
+ }
+
+ public Node(Serializable obj) {
+ this.obj = obj;
+ }
+
+ public Serializable get() {
+ return this.obj;
+ }
+
+ public void setDistance(float distance) {
+ this.distance = distance;
+ }
+
+ public float getDistance() {
+ return distance;
+ }
+
+ public String toString() {
+ return obj.toString();
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java b/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
new file mode 100755
index 000000000..2c1d2c071
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
@@ -0,0 +1,94 @@
+package edu.mit.simile.vicino.vptree;
+
+public class NodeSorter {
+
+ /**
+ * Sort array of Objects using the QuickSort algorithm.
+ *
+ * @param s
+ * An Object[].
+ * @param lo
+ * The current lower bound.
+ * @param hi
+ * The current upper bound.
+ */
+ public static void sort(Node nodes[], int lo, int hi) {
+ if (lo >= hi) {
+ return;
+ }
+
+ /*
+ * Use median-of-three(lo, mid, hi) to pick a partition. Also swap them
+ * into relative order while we are at it.
+ */
+ int mid = (lo + hi) / 2;
+
+ if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
+ // Swap.
+ Node tmp = nodes[lo];
+ nodes[lo] = nodes[mid];
+ nodes[mid] = tmp;
+ }
+
+ if (nodes[mid].getDistance() > nodes[hi].getDistance()) {
+ // Swap .
+ Node tmp = nodes[mid];
+ nodes[mid] = nodes[hi];
+ nodes[hi] = tmp;
+
+ if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
+ // Swap.
+ Node tmp2 = nodes[lo];
+ nodes[lo] = nodes[mid];
+ nodes[mid] = tmp2;
+ }
+ }
+
+ // Start one past lo since already handled lo.
+
+ int left = lo + 1;
+
+ // Similarly, end one before hi since already handled hi.
+
+ int right = hi - 1;
+
+ // If there are three or fewer elements, we are done.
+
+ if (left >= right) {
+ return;
+ }
+
+ Node partition = nodes[mid];
+
+ while (true) {
+ while (nodes[right].getDistance() > partition.getDistance()) {
+ --right;
+ }
+
+ while (left < right && nodes[left].getDistance() <= partition.getDistance()) {
+ ++left;
+ }
+
+ if (left < right) {
+ // Swap.
+ Node tmp = nodes[left];
+ nodes[left] = nodes[right];
+ nodes[right] = tmp;
+
+ --right;
+ } else {
+ break;
+ }
+ }
+
+ sort(nodes, lo, left);
+ sort(nodes, left + 1, hi);
+ }
+
+ /**
+ * Sorts and array of objects.
+ */
+ public void sort(Node nodes[]) {
+ NodeSorter.sort(nodes, 0, nodes.length - 1);
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/TNode.java b/src/main/java/edu/mit/simile/vicino/vptree/TNode.java
new file mode 100755
index 000000000..1f9917fc8
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/TNode.java
@@ -0,0 +1,52 @@
+package edu.mit.simile.vicino.vptree;
+
+import java.io.Serializable;
+
+/**
+ * @author Paolo Ciccarese
+ */
+public class TNode implements Serializable {
+
+ private static final long serialVersionUID = -217604190976851241L;
+
+ private final Serializable obj;
+ private float median;
+ private TNode left;
+ private TNode right;
+
+ /**
+ * The Object will be fixed during the instantiation of the node, while the
+ * children will be defined in another iteration of the algorithm,
+ */
+ public TNode(Serializable obj) {
+ this.obj = obj;
+ }
+
+ public Serializable get() {
+ return this.obj;
+ }
+
+ public void setMedian(float median) {
+ this.median = median;
+ }
+
+ public float getMedian() {
+ return median;
+ }
+
+ public void setLeft(TNode leftNode) {
+ this.left = leftNode;
+ }
+
+ public TNode getLeft() {
+ return left;
+ }
+
+ public void setRight(TNode rightNode) {
+ this.right = rightNode;
+ }
+
+ public TNode getRight() {
+ return right;
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
new file mode 100755
index 000000000..4ce8cb997
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
@@ -0,0 +1,33 @@
+package edu.mit.simile.vicino.vptree;
+
+import java.io.Serializable;
+
+/**
+ * The VPTree class.
+ *
+ * @author Paolo Ciccarese
+ */
+public class VPTree implements Serializable {
+
+ private static final long serialVersionUID = 1291056732155841123L;
+
+ private TNode root;
+
+ /**
+ * Sets the root of the VPTree.
+ *
+ * @param root The VPTree root.
+ */
+ public void setRoot(TNode root) {
+ this.root = root;
+ }
+
+ /**
+ * Get the root of the VPTree.
+ *
+ * @return The VPTree root.
+ */
+ public TNode getRoot() {
+ return root;
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
new file mode 100755
index 000000000..c7ea617e3
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
@@ -0,0 +1,101 @@
+package edu.mit.simile.vicino.vptree;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Random;
+
+import edu.mit.simile.vicino.Distance;
+
+/**
+ * @author Paolo Ciccarese
+ * @author Stefano Mazzocchi
+ */
+public class VPTreeBuilder {
+
+ private static final boolean DEBUG = false;
+
+ private Random generator = new Random(System.currentTimeMillis());
+
+ private VPTree tree;
+ private final Distance distance;
+
+ /**
+ * Defines a VPTree Builder for a specific distance.
+ *
+ * @param distance The class implementing the distance.
+ */
+ public VPTreeBuilder(Distance distance) {
+ this.distance = distance;
+ }
+
+ public VPTree buildVPTree(Collection extends Serializable> col) {
+ Node nodes[] = new Node[col.size()];
+ Iterator extends Serializable> i = col.iterator();
+ int counter = 0;
+ while (i.hasNext()) {
+ Serializable s = (Serializable) i.next();
+ nodes[counter++] = new Node(s);
+ }
+
+ tree = new VPTree();
+ tree.setRoot(addNode(nodes, 0, nodes.length - 1));
+ return tree;
+ }
+
+ private TNode addNode(Node nodes[], int begin, int end) {
+
+ int delta = end - begin;
+ int middle = begin + delta / 2;
+
+ TNode node = new TNode(nodes[begin + getRandomIndex(delta)].get());
+
+ if (DEBUG) System.out.println("\nnode: " + node.get().toString());
+
+ calculateDistances(node, nodes, begin, end);
+ orderDistances(nodes, begin, end);
+
+ if (DEBUG) {
+ for (int i = begin; i <= end; i++) {
+ System.out.println(" +-- " + nodes[i].getDistance() + " --> " + nodes[i].get());
+ }
+ }
+
+ if (delta + 1 > 0) {
+ if (middle - (begin + 1) >= 1) {
+ node.setLeft(addNode(nodes, begin + 1, middle));
+ if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
+ } else if (middle - (begin + 1) == 0) {
+ node.setLeft(new TNode(nodes[middle].get()));
+ if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
+ }
+
+ if ((end - (middle + 1)) >= 1) {
+ node.setRight(addNode(nodes, middle + 1, end));
+ if (DEBUG) System.out.println(" R --> " + node.getRight().get());
+ } else if (end - (middle + 1) == 0) {
+ node.setRight(new TNode(nodes[middle + 1].get()));
+ if (DEBUG) System.out.println(" R --> " + node.getRight().get());
+ }
+ }
+
+ return node;
+ }
+
+ private void calculateDistances(TNode pivot, Node nodes[], int begin, int end) {
+ for (int i = begin; i <= end; i++) {
+ Object x = pivot.get();
+ Object y = nodes[i].get();
+ float d = (x == y) ? 0.0f : distance.d(x.toString(), y.toString());
+ nodes[i].setDistance(d);
+ }
+ }
+
+ private void orderDistances(Node nodes[], int begin, int end) {
+ NodeSorter.sort(nodes, begin, end);
+ }
+
+ private int getRandomIndex(int max) {
+ return generator.nextInt(max);
+ }
+}
diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
new file mode 100755
index 000000000..389a75eaf
--- /dev/null
+++ b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
@@ -0,0 +1,48 @@
+package edu.mit.simile.vicino.vptree;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.mit.simile.vicino.Distance;
+
+/**
+ * @author Paolo Ciccarese
+ */
+public class VPTreeSeeker {
+
+ VPTree tree;
+ Distance distance;
+
+ public VPTreeSeeker(Distance distance, VPTree tree) {
+ this.distance = distance;
+ this.tree = tree;
+ }
+
+ public List extends Serializable> range(Object query, float range) {
+ return rangeTraversal(query, range, tree.getRoot(), new ArrayList());
+ }
+
+ private List rangeTraversal(Object query, float range, TNode tNode, List results) {
+
+ if (tNode != null) {
+ float distance = this.distance.d(query.toString(), tNode.toString());
+
+ if (distance < range) {
+ results.add(tNode.get());
+ }
+
+ if ((distance + range) < tNode.getMedian()) {
+ rangeTraversal(query, range, tNode.getLeft(), results);
+ } else if ((distance - range) > tNode.getMedian()) {
+ rangeTraversal(query, range, tNode.getRight(), results);
+ } else {
+ rangeTraversal(query, range, tNode.getLeft(), results);
+ rangeTraversal(query, range, tNode.getRight(), results);
+ }
+ }
+
+ return results;
+ }
+
+}
diff --git a/src/main/java/org/apache/tools/bzip2/BZip2Constants.java b/src/main/java/org/apache/tools/bzip2/BZip2Constants.java
new file mode 100644
index 000000000..4f832d67d
--- /dev/null
+++ b/src/main/java/org/apache/tools/bzip2/BZip2Constants.java
@@ -0,0 +1,136 @@
+/*
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowlegement may appear in the software itself,
+ * if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "Ant" and "Apache Software
+ * Foundation" must not be used to endorse or promote products derived
+ * from this software without prior written permission. For written
+ * permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ * nor may "Apache" appear in their names without prior written
+ * permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.tools.bzip2;
+
+/**
+ * Base class for both the compress and decompress classes.
+ * Holds common arrays, and static data.
+ *
+ * @author Keiron Liddle
+ */
+public interface BZip2Constants {
+
+ int baseBlockSize = 100000;
+ int MAX_ALPHA_SIZE = 258;
+ int MAX_CODE_LEN = 23;
+ int RUNA = 0;
+ int RUNB = 1;
+ int N_GROUPS = 6;
+ int G_SIZE = 50;
+ int N_ITERS = 4;
+ int MAX_SELECTORS = (2 + (900000 / G_SIZE));
+ int NUM_OVERSHOOT_BYTES = 20;
+
+ int[] rNums = {
+ 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
+ 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
+ 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
+ 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
+ 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
+ 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
+ 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
+ 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
+ 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
+ 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
+ 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
+ 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
+ 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
+ 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
+ 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
+ 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
+ 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
+ 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
+ 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
+ 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
+ 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
+ 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
+ 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
+ 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
+ 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
+ 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
+ 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
+ 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
+ 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
+ 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
+ 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
+ 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
+ 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
+ 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
+ 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
+ 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
+ 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
+ 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
+ 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
+ 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
+ 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
+ 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
+ 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
+ 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
+ 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
+ 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
+ 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
+ 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
+ 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
+ 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
+ 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
+ 936, 638
+ };
+}
diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
new file mode 100644
index 000000000..da4c82017
--- /dev/null
+++ b/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
@@ -0,0 +1,865 @@
+/*
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001-2003 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowlegement may appear in the software itself,
+ * if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "Ant" and "Apache Software
+ * Foundation" must not be used to endorse or promote products derived
+ * from this software without prior written permission. For written
+ * permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ * nor may "Apache" appear in their names without prior written
+ * permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * to whom the Ant project is very grateful for his
+ * great code.
+ */
+package org.apache.tools.bzip2;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * An input stream that decompresses from the BZip2 format (without the file
+ * header chars) to be read as any other stream.
+ *
+ * @author Keiron Liddle
+ */
+public class CBZip2InputStream extends InputStream implements BZip2Constants {
+ private static void cadvise() {
+ System.out.println("CRC Error");
+ //throw new CCoruptionError();
+ }
+
+ private static void compressedStreamEOF() {
+ cadvise();
+ }
+
+ private void makeMaps() {
+ int i;
+ nInUse = 0;
+ for (i = 0; i < 256; i++) {
+ if (inUse[i]) {
+ seqToUnseq[nInUse] = (char) i;
+ unseqToSeq[i] = (char) nInUse;
+ nInUse++;
+ }
+ }
+ }
+
+ /*
+ index of the last char in the block, so
+ the block size == last + 1.
+ */
+ private int last;
+
+ /*
+ index in zptr[] of original string after sorting.
+ */
+ private int origPtr;
+
+ /*
+ always: in the range 0 .. 9.
+ The current block size is 100000 * this number.
+ */
+ private int blockSize100k;
+
+ private boolean blockRandomised;
+
+ private int bsBuff;
+ private int bsLive;
+ private CRC mCrc = new CRC();
+
+ private boolean[] inUse = new boolean[256];
+ private int nInUse;
+
+ private char[] seqToUnseq = new char[256];
+ private char[] unseqToSeq = new char[256];
+
+ private char[] selector = new char[MAX_SELECTORS];
+ private char[] selectorMtf = new char[MAX_SELECTORS];
+
+ private int[] tt;
+ private char[] ll8;
+
+ /*
+ freq table collected to save a pass over the data
+ during decompression.
+ */
+ private int[] unzftab = new int[256];
+
+ private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ private int[] minLens = new int[N_GROUPS];
+
+ private InputStream bsStream;
+
+ private boolean streamEnd = false;
+
+ private int currentChar = -1;
+
+ private static final int START_BLOCK_STATE = 1;
+ private static final int RAND_PART_A_STATE = 2;
+ private static final int RAND_PART_B_STATE = 3;
+ private static final int RAND_PART_C_STATE = 4;
+ private static final int NO_RAND_PART_A_STATE = 5;
+ private static final int NO_RAND_PART_B_STATE = 6;
+ private static final int NO_RAND_PART_C_STATE = 7;
+
+ private int currentState = START_BLOCK_STATE;
+
+ private int storedBlockCRC, storedCombinedCRC;
+ private int computedBlockCRC, computedCombinedCRC;
+
+ int i2, count, chPrev, ch2;
+ int i, tPos;
+ int rNToGo = 0;
+ int rTPos = 0;
+ int j2;
+ char z;
+
+ public CBZip2InputStream(InputStream zStream) {
+ ll8 = null;
+ tt = null;
+ bsSetStream(zStream);
+ initialize();
+ initBlock();
+ setupBlock();
+ }
+
+ public int read() {
+ if (streamEnd) {
+ return -1;
+ } else {
+ int retChar = currentChar;
+ switch(currentState) {
+ case START_BLOCK_STATE:
+ break;
+ case RAND_PART_A_STATE:
+ break;
+ case RAND_PART_B_STATE:
+ setupRandPartB();
+ break;
+ case RAND_PART_C_STATE:
+ setupRandPartC();
+ break;
+ case NO_RAND_PART_A_STATE:
+ break;
+ case NO_RAND_PART_B_STATE:
+ setupNoRandPartB();
+ break;
+ case NO_RAND_PART_C_STATE:
+ setupNoRandPartC();
+ break;
+ default:
+ break;
+ }
+ return retChar;
+ }
+ }
+
+ private void initialize() {
+ char magic3, magic4;
+ magic3 = bsGetUChar();
+ magic4 = bsGetUChar();
+ if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
+ bsFinishedWithStream();
+ streamEnd = true;
+ return;
+ }
+
+ setDecompressStructureSizes(magic4 - '0');
+ computedCombinedCRC = 0;
+ }
+
+ private void initBlock() {
+ char magic1, magic2, magic3, magic4;
+ char magic5, magic6;
+ magic1 = bsGetUChar();
+ magic2 = bsGetUChar();
+ magic3 = bsGetUChar();
+ magic4 = bsGetUChar();
+ magic5 = bsGetUChar();
+ magic6 = bsGetUChar();
+ if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
+ && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
+ complete();
+ return;
+ }
+
+ if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
+ || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
+ badBlockHeader();
+ streamEnd = true;
+ return;
+ }
+
+ storedBlockCRC = bsGetInt32();
+
+ if (bsR(1) == 1) {
+ blockRandomised = true;
+ } else {
+ blockRandomised = false;
+ }
+
+ // currBlockNo++;
+ getAndMoveToFrontDecode();
+
+ mCrc.initialiseCRC();
+ currentState = START_BLOCK_STATE;
+ }
+
+ private void endBlock() {
+ computedBlockCRC = mCrc.getFinalCRC();
+ /* A bad CRC is considered a fatal error. */
+ if (storedBlockCRC != computedBlockCRC) {
+ crcError();
+ }
+
+ computedCombinedCRC = (computedCombinedCRC << 1)
+ | (computedCombinedCRC >>> 31);
+ computedCombinedCRC ^= computedBlockCRC;
+ }
+
+ private void complete() {
+ storedCombinedCRC = bsGetInt32();
+ if (storedCombinedCRC != computedCombinedCRC) {
+ crcError();
+ }
+
+ bsFinishedWithStream();
+ streamEnd = true;
+ }
+
+ private static void blockOverrun() {
+ cadvise();
+ }
+
+ private static void badBlockHeader() {
+ cadvise();
+ }
+
+ private static void crcError() {
+ cadvise();
+ }
+
+ private void bsFinishedWithStream() {
+ try {
+ if (this.bsStream != null) {
+ if (this.bsStream != System.in) {
+ this.bsStream.close();
+ this.bsStream = null;
+ }
+ }
+ } catch (IOException ioe) {
+ //ignore
+ }
+ }
+
+ private void bsSetStream(InputStream f) {
+ bsStream = f;
+ bsLive = 0;
+ bsBuff = 0;
+ }
+
+ private int bsR(int n) {
+ int v;
+ while (bsLive < n) {
+ int zzi;
+ char thech = 0;
+ try {
+ thech = (char) bsStream.read();
+ } catch (IOException e) {
+ compressedStreamEOF();
+ }
+ if (thech == -1) {
+ compressedStreamEOF();
+ }
+ zzi = thech;
+ bsBuff = (bsBuff << 8) | (zzi & 0xff);
+ bsLive += 8;
+ }
+
+ v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
+ bsLive -= n;
+ return v;
+ }
+
+ private char bsGetUChar() {
+ return (char) bsR(8);
+ }
+
+ private int bsGetint() {
+ int u = 0;
+ u = (u << 8) | bsR(8);
+ u = (u << 8) | bsR(8);
+ u = (u << 8) | bsR(8);
+ u = (u << 8) | bsR(8);
+ return u;
+ }
+
+ private int bsGetIntVS(int numBits) {
+ return (int) bsR(numBits);
+ }
+
+ private int bsGetInt32() {
+ return (int) bsGetint();
+ }
+
+ private void hbCreateDecodeTables(int[] limit, int[] base,
+ int[] perm, char[] length,
+ int minLen, int maxLen, int alphaSize) {
+ int pp, i, j, vec;
+
+ pp = 0;
+ for (i = minLen; i <= maxLen; i++) {
+ for (j = 0; j < alphaSize; j++) {
+ if (length[j] == i) {
+ perm[pp] = j;
+ pp++;
+ }
+ }
+ }
+
+ for (i = 0; i < MAX_CODE_LEN; i++) {
+ base[i] = 0;
+ }
+ for (i = 0; i < alphaSize; i++) {
+ base[length[i] + 1]++;
+ }
+
+ for (i = 1; i < MAX_CODE_LEN; i++) {
+ base[i] += base[i - 1];
+ }
+
+ for (i = 0; i < MAX_CODE_LEN; i++) {
+ limit[i] = 0;
+ }
+ vec = 0;
+
+ for (i = minLen; i <= maxLen; i++) {
+ vec += (base[i + 1] - base[i]);
+ limit[i] = vec - 1;
+ vec <<= 1;
+ }
+ for (i = minLen + 1; i <= maxLen; i++) {
+ base[i] = ((limit[i - 1] + 1) << 1) - base[i];
+ }
+ }
+
+ private void recvDecodingTables() {
+ char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
+ int i, j, t, nGroups, nSelectors, alphaSize;
+ int minLen, maxLen;
+ boolean[] inUse16 = new boolean[16];
+
+ /* Receive the mapping table */
+ for (i = 0; i < 16; i++) {
+ if (bsR(1) == 1) {
+ inUse16[i] = true;
+ } else {
+ inUse16[i] = false;
+ }
+ }
+
+ for (i = 0; i < 256; i++) {
+ inUse[i] = false;
+ }
+
+ for (i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ for (j = 0; j < 16; j++) {
+ if (bsR(1) == 1) {
+ inUse[i * 16 + j] = true;
+ }
+ }
+ }
+ }
+
+ makeMaps();
+ alphaSize = nInUse + 2;
+
+ /* Now the selectors */
+ nGroups = bsR(3);
+ nSelectors = bsR(15);
+ for (i = 0; i < nSelectors; i++) {
+ j = 0;
+ while (bsR(1) == 1) {
+ j++;
+ }
+ selectorMtf[i] = (char) j;
+ }
+
+ /* Undo the MTF values for the selectors. */
+ {
+ char[] pos = new char[N_GROUPS];
+ char tmp, v;
+ for (v = 0; v < nGroups; v++) {
+ pos[v] = v;
+ }
+
+ for (i = 0; i < nSelectors; i++) {
+ v = selectorMtf[i];
+ tmp = pos[v];
+ while (v > 0) {
+ pos[v] = pos[v - 1];
+ v--;
+ }
+ pos[0] = tmp;
+ selector[i] = tmp;
+ }
+ }
+
+ /* Now the coding tables */
+ for (t = 0; t < nGroups; t++) {
+ int curr = bsR(5);
+ for (i = 0; i < alphaSize; i++) {
+ while (bsR(1) == 1) {
+ if (bsR(1) == 0) {
+ curr++;
+ } else {
+ curr--;
+ }
+ }
+ len[t][i] = (char) curr;
+ }
+ }
+
+ /* Create the Huffman decoding tables */
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (len[t][i] > maxLen) {
+ maxLen = len[t][i];
+ }
+ if (len[t][i] < minLen) {
+ minLen = len[t][i];
+ }
+ }
+ hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
+ maxLen, alphaSize);
+ minLens[t] = minLen;
+ }
+ }
+
+ private void getAndMoveToFrontDecode() {
+ char[] yy = new char[256];
+ int i, j, nextSym, limitLast;
+ int EOB, groupNo, groupPos;
+
+ limitLast = baseBlockSize * blockSize100k;
+ origPtr = bsGetIntVS(24);
+
+ recvDecodingTables();
+ EOB = nInUse + 1;
+ groupNo = -1;
+ groupPos = 0;
+
+ /*
+ Setting up the unzftab entries here is not strictly
+ necessary, but it does save having to do it later
+ in a separate pass, and so saves a block's worth of
+ cache misses.
+ */
+ for (i = 0; i <= 255; i++) {
+ unzftab[i] = 0;
+ }
+
+ for (i = 0; i <= 255; i++) {
+ yy[i] = (char) i;
+ }
+
+ last = -1;
+
+ {
+ int zt, zn, zvec, zj;
+ if (groupPos == 0) {
+ groupNo++;
+ groupPos = G_SIZE;
+ }
+ groupPos--;
+ zt = selector[groupNo];
+ zn = minLens[zt];
+ zvec = bsR(zn);
+ while (zvec > limit[zt][zn]) {
+ zn++;
+ {
+ {
+ while (bsLive < 1) {
+ int zzi;
+ char thech = 0;
+ try {
+ thech = (char) bsStream.read();
+ } catch (IOException e) {
+ compressedStreamEOF();
+ }
+ if (thech == -1) {
+ compressedStreamEOF();
+ }
+ zzi = thech;
+ bsBuff = (bsBuff << 8) | (zzi & 0xff);
+ bsLive += 8;
+ }
+ }
+ zj = (bsBuff >> (bsLive - 1)) & 1;
+ bsLive--;
+ }
+ zvec = (zvec << 1) | zj;
+ }
+ nextSym = perm[zt][zvec - base[zt][zn]];
+ }
+
+ while (true) {
+
+ if (nextSym == EOB) {
+ break;
+ }
+
+ if (nextSym == RUNA || nextSym == RUNB) {
+ char ch;
+ int s = -1;
+ int N = 1;
+ do {
+ if (nextSym == RUNA) {
+ s = s + (0 + 1) * N;
+ } else if (nextSym == RUNB) {
+ s = s + (1 + 1) * N;
+ }
+ N = N * 2;
+ {
+ int zt, zn, zvec, zj;
+ if (groupPos == 0) {
+ groupNo++;
+ groupPos = G_SIZE;
+ }
+ groupPos--;
+ zt = selector[groupNo];
+ zn = minLens[zt];
+ zvec = bsR(zn);
+ while (zvec > limit[zt][zn]) {
+ zn++;
+ {
+ {
+ while (bsLive < 1) {
+ int zzi;
+ char thech = 0;
+ try {
+ thech = (char) bsStream.read();
+ } catch (IOException e) {
+ compressedStreamEOF();
+ }
+ if (thech == -1) {
+ compressedStreamEOF();
+ }
+ zzi = thech;
+ bsBuff = (bsBuff << 8) | (zzi & 0xff);
+ bsLive += 8;
+ }
+ }
+ zj = (bsBuff >> (bsLive - 1)) & 1;
+ bsLive--;
+ }
+ zvec = (zvec << 1) | zj;
+ }
+ nextSym = perm[zt][zvec - base[zt][zn]];
+ }
+ } while (nextSym == RUNA || nextSym == RUNB);
+
+ s++;
+ ch = seqToUnseq[yy[0]];
+ unzftab[ch] += s;
+
+ while (s > 0) {
+ last++;
+ ll8[last] = ch;
+ s--;
+ }
+
+ if (last >= limitLast) {
+ blockOverrun();
+ }
+ continue;
+ } else {
+ char tmp;
+ last++;
+ if (last >= limitLast) {
+ blockOverrun();
+ }
+
+ tmp = yy[nextSym - 1];
+ unzftab[seqToUnseq[tmp]]++;
+ ll8[last] = seqToUnseq[tmp];
+
+ /*
+ This loop is hammered during decompression,
+ hence the unrolling.
+
+ for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
+ */
+
+ j = nextSym - 1;
+ for (; j > 3; j -= 4) {
+ yy[j] = yy[j - 1];
+ yy[j - 1] = yy[j - 2];
+ yy[j - 2] = yy[j - 3];
+ yy[j - 3] = yy[j - 4];
+ }
+ for (; j > 0; j--) {
+ yy[j] = yy[j - 1];
+ }
+
+ yy[0] = tmp;
+ {
+ int zt, zn, zvec, zj;
+ if (groupPos == 0) {
+ groupNo++;
+ groupPos = G_SIZE;
+ }
+ groupPos--;
+ zt = selector[groupNo];
+ zn = minLens[zt];
+ zvec = bsR(zn);
+ while (zvec > limit[zt][zn]) {
+ zn++;
+ {
+ {
+ while (bsLive < 1) {
+ int zzi;
+ char thech = 0;
+ try {
+ thech = (char) bsStream.read();
+ } catch (IOException e) {
+ compressedStreamEOF();
+ }
+ zzi = thech;
+ bsBuff = (bsBuff << 8) | (zzi & 0xff);
+ bsLive += 8;
+ }
+ }
+ zj = (bsBuff >> (bsLive - 1)) & 1;
+ bsLive--;
+ }
+ zvec = (zvec << 1) | zj;
+ }
+ nextSym = perm[zt][zvec - base[zt][zn]];
+ }
+ continue;
+ }
+ }
+ }
+
+ private void setupBlock() {
+ int[] cftab = new int[257];
+ char ch;
+
+ cftab[0] = 0;
+ for (i = 1; i <= 256; i++) {
+ cftab[i] = unzftab[i - 1];
+ }
+ for (i = 1; i <= 256; i++) {
+ cftab[i] += cftab[i - 1];
+ }
+
+ for (i = 0; i <= last; i++) {
+ ch = (char) ll8[i];
+ tt[cftab[ch]] = i;
+ cftab[ch]++;
+ }
+ cftab = null;
+
+ tPos = tt[origPtr];
+
+ count = 0;
+ i2 = 0;
+ ch2 = 256; /* not a char and not EOF */
+
+ if (blockRandomised) {
+ rNToGo = 0;
+ rTPos = 0;
+ setupRandPartA();
+ } else {
+ setupNoRandPartA();
+ }
+ }
+
+ private void setupRandPartA() {
+ if (i2 <= last) {
+ chPrev = ch2;
+ ch2 = ll8[tPos];
+ tPos = tt[tPos];
+ if (rNToGo == 0) {
+ rNToGo = rNums[rTPos];
+ rTPos++;
+ if (rTPos == 512) {
+ rTPos = 0;
+ }
+ }
+ rNToGo--;
+ ch2 ^= (int) ((rNToGo == 1) ? 1 : 0);
+ i2++;
+
+ currentChar = ch2;
+ currentState = RAND_PART_B_STATE;
+ mCrc.updateCRC(ch2);
+ } else {
+ endBlock();
+ initBlock();
+ setupBlock();
+ }
+ }
+
+ private void setupNoRandPartA() {
+ if (i2 <= last) {
+ chPrev = ch2;
+ ch2 = ll8[tPos];
+ tPos = tt[tPos];
+ i2++;
+
+ currentChar = ch2;
+ currentState = NO_RAND_PART_B_STATE;
+ mCrc.updateCRC(ch2);
+ } else {
+ endBlock();
+ initBlock();
+ setupBlock();
+ }
+ }
+
+ private void setupRandPartB() {
+ if (ch2 != chPrev) {
+ currentState = RAND_PART_A_STATE;
+ count = 1;
+ setupRandPartA();
+ } else {
+ count++;
+ if (count >= 4) {
+ z = ll8[tPos];
+ tPos = tt[tPos];
+ if (rNToGo == 0) {
+ rNToGo = rNums[rTPos];
+ rTPos++;
+ if (rTPos == 512) {
+ rTPos = 0;
+ }
+ }
+ rNToGo--;
+ z ^= ((rNToGo == 1) ? 1 : 0);
+ j2 = 0;
+ currentState = RAND_PART_C_STATE;
+ setupRandPartC();
+ } else {
+ currentState = RAND_PART_A_STATE;
+ setupRandPartA();
+ }
+ }
+ }
+
+ private void setupRandPartC() {
+ if (j2 < (int) z) {
+ currentChar = ch2;
+ mCrc.updateCRC(ch2);
+ j2++;
+ } else {
+ currentState = RAND_PART_A_STATE;
+ i2++;
+ count = 0;
+ setupRandPartA();
+ }
+ }
+
+ private void setupNoRandPartB() {
+ if (ch2 != chPrev) {
+ currentState = NO_RAND_PART_A_STATE;
+ count = 1;
+ setupNoRandPartA();
+ } else {
+ count++;
+ if (count >= 4) {
+ z = ll8[tPos];
+ tPos = tt[tPos];
+ currentState = NO_RAND_PART_C_STATE;
+ j2 = 0;
+ setupNoRandPartC();
+ } else {
+ currentState = NO_RAND_PART_A_STATE;
+ setupNoRandPartA();
+ }
+ }
+ }
+
+ private void setupNoRandPartC() {
+ if (j2 < (int) z) {
+ currentChar = ch2;
+ mCrc.updateCRC(ch2);
+ j2++;
+ } else {
+ currentState = NO_RAND_PART_A_STATE;
+ i2++;
+ count = 0;
+ setupNoRandPartA();
+ }
+ }
+
+ private void setDecompressStructureSizes(int newSize100k) {
+ if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k
+ && blockSize100k <= 9)) {
+ // throw new IOException("Invalid block size");
+ }
+
+ blockSize100k = newSize100k;
+
+ if (newSize100k == 0) {
+ return;
+ }
+
+ int n = baseBlockSize * newSize100k;
+ ll8 = new char[n];
+ tt = new int[n];
+ }
+}
+
diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
new file mode 100644
index 000000000..521cfe70d
--- /dev/null
+++ b/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
@@ -0,0 +1,1665 @@
+/*
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001-2003 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowlegement may appear in the software itself,
+ * if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "Ant" and "Apache Software
+ * Foundation" must not be used to endorse or promote products derived
+ * from this software without prior written permission. For written
+ * permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ * nor may "Apache" appear in their names without prior written
+ * permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.tools.bzip2;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * An output stream that compresses into the BZip2 format (without the file
+ * header chars) into another stream.
+ *
+ * @author Keiron Liddle
+ *
+ * TODO: Update to BZip2 1.0.1
+ */
+public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
+ protected static final int SETMASK = (1 << 21);
+ protected static final int CLEARMASK = (~SETMASK);
+ protected static final int GREATER_ICOST = 15;
+ protected static final int LESSER_ICOST = 0;
+ protected static final int SMALL_THRESH = 20;
+ protected static final int DEPTH_THRESH = 10;
+
+ /*
+ If you are ever unlucky/improbable enough
+ to get a stack overflow whilst sorting,
+ increase the following constant and try
+ again. In practice I have never seen the
+ stack go above 27 elems, so the following
+ limit seems very generous.
+ */
+ protected static final int QSORT_STACK_SIZE = 1000;
+
+ private static void panic() {
+ System.out.println("panic");
+ //throw new CError();
+ }
+
+ private void makeMaps() {
+ int i;
+ nInUse = 0;
+ for (i = 0; i < 256; i++) {
+ if (inUse[i]) {
+ seqToUnseq[nInUse] = (char) i;
+ unseqToSeq[i] = (char) nInUse;
+ nInUse++;
+ }
+ }
+ }
+
+ protected static void hbMakeCodeLengths(char[] len, int[] freq,
+ int alphaSize, int maxLen) {
+ /*
+ Nodes and heap entries run from 1. Entry 0
+ for both the heap and nodes is a sentinel.
+ */
+ int nNodes, nHeap, n1, n2, i, j, k;
+ boolean tooLong;
+
+ int[] heap = new int[MAX_ALPHA_SIZE + 2];
+ int[] weight = new int[MAX_ALPHA_SIZE * 2];
+ int[] parent = new int[MAX_ALPHA_SIZE * 2];
+
+ for (i = 0; i < alphaSize; i++) {
+ weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+ }
+
+ while (true) {
+ nNodes = alphaSize;
+ nHeap = 0;
+
+ heap[0] = 0;
+ weight[0] = 0;
+ parent[0] = -2;
+
+ for (i = 1; i <= alphaSize; i++) {
+ parent[i] = -1;
+ nHeap++;
+ heap[nHeap] = i;
+ {
+ int zz, tmp;
+ zz = nHeap;
+ tmp = heap[zz];
+ while (weight[tmp] < weight[heap[zz >> 1]]) {
+ heap[zz] = heap[zz >> 1];
+ zz >>= 1;
+ }
+ heap[zz] = tmp;
+ }
+ }
+ if (!(nHeap < (MAX_ALPHA_SIZE + 2))) {
+ panic();
+ }
+
+ while (nHeap > 1) {
+ n1 = heap[1];
+ heap[1] = heap[nHeap];
+ nHeap--;
+ {
+ int zz = 0, yy = 0, tmp = 0;
+ zz = 1;
+ tmp = heap[zz];
+ while (true) {
+ yy = zz << 1;
+ if (yy > nHeap) {
+ break;
+ }
+ if (yy < nHeap
+ && weight[heap[yy + 1]] < weight[heap[yy]]) {
+ yy++;
+ }
+ if (weight[tmp] < weight[heap[yy]]) {
+ break;
+ }
+ heap[zz] = heap[yy];
+ zz = yy;
+ }
+ heap[zz] = tmp;
+ }
+ n2 = heap[1];
+ heap[1] = heap[nHeap];
+ nHeap--;
+ {
+ int zz = 0, yy = 0, tmp = 0;
+ zz = 1;
+ tmp = heap[zz];
+ while (true) {
+ yy = zz << 1;
+ if (yy > nHeap) {
+ break;
+ }
+ if (yy < nHeap
+ && weight[heap[yy + 1]] < weight[heap[yy]]) {
+ yy++;
+ }
+ if (weight[tmp] < weight[heap[yy]]) {
+ break;
+ }
+ heap[zz] = heap[yy];
+ zz = yy;
+ }
+ heap[zz] = tmp;
+ }
+ nNodes++;
+ parent[n1] = parent[n2] = nNodes;
+
+ weight[nNodes] = ((weight[n1] & 0xffffff00)
+ + (weight[n2] & 0xffffff00))
+ | (1 + (((weight[n1] & 0x000000ff) >
+ (weight[n2] & 0x000000ff)) ?
+ (weight[n1] & 0x000000ff) :
+ (weight[n2] & 0x000000ff)));
+
+ parent[nNodes] = -1;
+ nHeap++;
+ heap[nHeap] = nNodes;
+ {
+ int zz = 0, tmp = 0;
+ zz = nHeap;
+ tmp = heap[zz];
+ while (weight[tmp] < weight[heap[zz >> 1]]) {
+ heap[zz] = heap[zz >> 1];
+ zz >>= 1;
+ }
+ heap[zz] = tmp;
+ }
+ }
+ if (!(nNodes < (MAX_ALPHA_SIZE * 2))) {
+ panic();
+ }
+
+ tooLong = false;
+ for (i = 1; i <= alphaSize; i++) {
+ j = 0;
+ k = i;
+ while (parent[k] >= 0) {
+ k = parent[k];
+ j++;
+ }
+ len[i - 1] = (char) j;
+ if (j > maxLen) {
+ tooLong = true;
+ }
+ }
+
+ if (!tooLong) {
+ break;
+ }
+
+ for (i = 1; i < alphaSize; i++) {
+ j = weight[i] >> 8;
+ j = 1 + (j / 2);
+ weight[i] = j << 8;
+ }
+ }
+ }
+
+ /*
+ index of the last char in the block, so
+ the block size == last + 1.
+ */
+ int last;
+
+ /*
+ index in zptr[] of original string after sorting.
+ */
+ int origPtr;
+
+ /*
+ always: in the range 0 .. 9.
+ The current block size is 100000 * this number.
+ */
+ int blockSize100k;
+
+ boolean blockRandomised;
+
+ int bytesOut;
+ int bsBuff;
+ int bsLive;
+ CRC mCrc = new CRC();
+
+ private boolean[] inUse = new boolean[256];
+ private int nInUse;
+
+ private char[] seqToUnseq = new char[256];
+ private char[] unseqToSeq = new char[256];
+
+ private char[] selector = new char[MAX_SELECTORS];
+ private char[] selectorMtf = new char[MAX_SELECTORS];
+
+ private char[] block;
+ private int[] quadrant;
+ private int[] zptr;
+ private short[] szptr;
+ private int[] ftab;
+
+ private int nMTF;
+
+ private int[] mtfFreq = new int[MAX_ALPHA_SIZE];
+
+ /*
+ * Used when sorting. If too many long comparisons
+ * happen, we stop sorting, randomise the block
+ * slightly, and try again.
+ */
+ private int workFactor;
+ private int workDone;
+ private int workLimit;
+ private boolean firstAttempt;
+ private int nBlocksRandomised;
+
+ private int currentChar = -1;
+ private int runLength = 0;
+
+ public CBZip2OutputStream(OutputStream inStream) throws IOException {
+ this(inStream, 9);
+ }
+
+ public CBZip2OutputStream(OutputStream inStream, int inBlockSize)
+ throws IOException {
+ block = null;
+ quadrant = null;
+ zptr = null;
+ ftab = null;
+
+ bsSetStream(inStream);
+
+ workFactor = 50;
+ if (inBlockSize > 9) {
+ inBlockSize = 9;
+ }
+ if (inBlockSize < 1) {
+ inBlockSize = 1;
+ }
+ blockSize100k = inBlockSize;
+ allocateCompressStructures();
+ initialize();
+ initBlock();
+ }
+
+ /**
+ *
+ * modified by Oliver Merkel, 010128
+ *
+ */
+ public void write(int bv) throws IOException {
+ int b = (256 + bv) % 256;
+ if (currentChar != -1) {
+ if (currentChar == b) {
+ runLength++;
+ if (runLength > 254) {
+ writeRun();
+ currentChar = -1;
+ runLength = 0;
+ }
+ } else {
+ writeRun();
+ runLength = 1;
+ currentChar = b;
+ }
+ } else {
+ currentChar = b;
+ runLength++;
+ }
+ }
+
+ private void writeRun() throws IOException {
+ if (last < allowableBlockSize) {
+ inUse[currentChar] = true;
+ for (int i = 0; i < runLength; i++) {
+ mCrc.updateCRC((char) currentChar);
+ }
+ switch (runLength) {
+ case 1:
+ last++;
+ block[last + 1] = (char) currentChar;
+ break;
+ case 2:
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ break;
+ case 3:
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ break;
+ default:
+ inUse[runLength - 4] = true;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) currentChar;
+ last++;
+ block[last + 1] = (char) (runLength - 4);
+ break;
+ }
+ } else {
+ endBlock();
+ initBlock();
+ writeRun();
+ }
+ }
+
+ boolean closed = false;
+
+ protected void finalize() throws Throwable {
+ close();
+ super.finalize();
+ }
+
+ public void close() throws IOException {
+ if (closed) {
+ return;
+ }
+
+ if (runLength > 0) {
+ writeRun();
+ }
+ currentChar = -1;
+ endBlock();
+ endCompression();
+ closed = true;
+ super.close();
+ bsStream.close();
+ }
+
+ public void flush() throws IOException {
+ super.flush();
+ bsStream.flush();
+ }
+
+ private int blockCRC, combinedCRC;
+
+ private void initialize() throws IOException {
+ bytesOut = 0;
+ nBlocksRandomised = 0;
+
+ /* Write `magic' bytes h indicating file-format == huffmanised,
+ followed by a digit indicating blockSize100k.
+ */
+ bsPutUChar('h');
+ bsPutUChar('0' + blockSize100k);
+
+ combinedCRC = 0;
+ }
+
+ private int allowableBlockSize;
+
+ private void initBlock() {
+ // blockNo++;
+ mCrc.initialiseCRC();
+ last = -1;
+ // ch = 0;
+
+ for (int i = 0; i < 256; i++) {
+ inUse[i] = false;
+ }
+
+ /* 20 is just a paranoia constant */
+ allowableBlockSize = baseBlockSize * blockSize100k - 20;
+ }
+
+ private void endBlock() throws IOException {
+ blockCRC = mCrc.getFinalCRC();
+ combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31);
+ combinedCRC ^= blockCRC;
+
+ /* sort the block and establish posn of original string */
+ doReversibleTransformation();
+
+ /*
+ A 6-byte block header, the value chosen arbitrarily
+ as 0x314159265359 :-). A 32 bit value does not really
+ give a strong enough guarantee that the value will not
+ appear by chance in the compressed datastream. Worst-case
+ probability of this event, for a 900k block, is about
+ 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
+ For a compressed file of size 100Gb -- about 100000 blocks --
+ only a 48-bit marker will do. NB: normal compression/
+ decompression do *not* rely on these statistical properties.
+ They are only important when trying to recover blocks from
+ damaged files.
+ */
+ bsPutUChar(0x31);
+ bsPutUChar(0x41);
+ bsPutUChar(0x59);
+ bsPutUChar(0x26);
+ bsPutUChar(0x53);
+ bsPutUChar(0x59);
+
+ /* Now the block's CRC, so it is in a known place. */
+ bsPutint(blockCRC);
+
+ /* Now a single bit indicating randomisation. */
+ if (blockRandomised) {
+ bsW(1, 1);
+ nBlocksRandomised++;
+ } else {
+ bsW(1, 0);
+ }
+
+ /* Finally, block's contents proper. */
+ moveToFrontCodeAndSend();
+ }
+
+ private void endCompression() throws IOException {
+ /*
+ Now another magic 48-bit number, 0x177245385090, to
+ indicate the end of the last block. (sqrt(pi), if
+ you want to know. I did want to use e, but it contains
+ too much repetition -- 27 18 28 18 28 46 -- for me
+ to feel statistically comfortable. Call me paranoid.)
+ */
+ bsPutUChar(0x17);
+ bsPutUChar(0x72);
+ bsPutUChar(0x45);
+ bsPutUChar(0x38);
+ bsPutUChar(0x50);
+ bsPutUChar(0x90);
+
+ bsPutint(combinedCRC);
+
+ bsFinishedWithStream();
+ }
+
+ private void hbAssignCodes (int[] code, char[] length, int minLen,
+ int maxLen, int alphaSize) {
+ int n, vec, i;
+
+ vec = 0;
+ for (n = minLen; n <= maxLen; n++) {
+ for (i = 0; i < alphaSize; i++) {
+ if (length[i] == n) {
+ code[i] = vec;
+ vec++;
+ }
+ };
+ vec <<= 1;
+ }
+ }
+
+ private void bsSetStream(OutputStream f) {
+ bsStream = f;
+ bsLive = 0;
+ bsBuff = 0;
+ bytesOut = 0;
+ }
+
+ private void bsFinishedWithStream() throws IOException {
+ while (bsLive > 0) {
+ int ch = (bsBuff >> 24);
+ try {
+ bsStream.write(ch); // write 8-bit
+ } catch (IOException e) {
+ throw e;
+ }
+ bsBuff <<= 8;
+ bsLive -= 8;
+ bytesOut++;
+ }
+ }
+
+ private void bsW(int n, int v) throws IOException {
+ while (bsLive >= 8) {
+ int ch = (bsBuff >> 24);
+ try {
+ bsStream.write(ch); // write 8-bit
+ } catch (IOException e) {
+ throw e;
+ }
+ bsBuff <<= 8;
+ bsLive -= 8;
+ bytesOut++;
+ }
+ bsBuff |= (v << (32 - bsLive - n));
+ bsLive += n;
+ }
+
+ private void bsPutUChar(int c) throws IOException {
+ bsW(8, c);
+ }
+
+ private void bsPutint(int u) throws IOException {
+ bsW(8, (u >> 24) & 0xff);
+ bsW(8, (u >> 16) & 0xff);
+ bsW(8, (u >> 8) & 0xff);
+ bsW(8, u & 0xff);
+ }
+
+ private void bsPutIntVS(int numBits, int c) throws IOException {
+ bsW(numBits, c);
+ }
+
+ private void sendMTFValues() throws IOException {
+ char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
+
+ int v, t, i, j, gs, ge, totc, bt, bc, iter;
+ int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
+ int nGroups;
+
+ alphaSize = nInUse + 2;
+ for (t = 0; t < N_GROUPS; t++) {
+ for (v = 0; v < alphaSize; v++) {
+ len[t][v] = (char) GREATER_ICOST;
+ }
+ }
+
+ /* Decide how many coding tables to use */
+ if (nMTF <= 0) {
+ panic();
+ }
+
+ if (nMTF < 200) {
+ nGroups = 2;
+ } else if (nMTF < 600) {
+ nGroups = 3;
+ } else if (nMTF < 1200) {
+ nGroups = 4;
+ } else if (nMTF < 2400) {
+ nGroups = 5;
+ } else {
+ nGroups = 6;
+ }
+
+ /* Generate an initial set of coding tables */ {
+ int nPart, remF, tFreq, aFreq;
+
+ nPart = nGroups;
+ remF = nMTF;
+ gs = 0;
+ while (nPart > 0) {
+ tFreq = remF / nPart;
+ ge = gs - 1;
+ aFreq = 0;
+ while (aFreq < tFreq && ge < alphaSize - 1) {
+ ge++;
+ aFreq += mtfFreq[ge];
+ }
+
+ if (ge > gs && nPart != nGroups && nPart != 1
+ && ((nGroups - nPart) % 2 == 1)) {
+ aFreq -= mtfFreq[ge];
+ ge--;
+ }
+
+ for (v = 0; v < alphaSize; v++) {
+ if (v >= gs && v <= ge) {
+ len[nPart - 1][v] = (char) LESSER_ICOST;
+ } else {
+ len[nPart - 1][v] = (char) GREATER_ICOST;
+ }
+ }
+
+ nPart--;
+ gs = ge + 1;
+ remF -= aFreq;
+ }
+ }
+
+ int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE];
+ int[] fave = new int[N_GROUPS];
+ short[] cost = new short[N_GROUPS];
+ /*
+ Iterate up to N_ITERS times to improve the tables.
+ */
+ for (iter = 0; iter < N_ITERS; iter++) {
+ for (t = 0; t < nGroups; t++) {
+ fave[t] = 0;
+ }
+
+ for (t = 0; t < nGroups; t++) {
+ for (v = 0; v < alphaSize; v++) {
+ rfreq[t][v] = 0;
+ }
+ }
+
+ nSelectors = 0;
+ totc = 0;
+ gs = 0;
+ while (true) {
+
+ /* Set group start & end marks. */
+ if (gs >= nMTF) {
+ break;
+ }
+ ge = gs + G_SIZE - 1;
+ if (ge >= nMTF) {
+ ge = nMTF - 1;
+ }
+
+ /*
+ Calculate the cost of this group as coded
+ by each of the coding tables.
+ */
+ for (t = 0; t < nGroups; t++) {
+ cost[t] = 0;
+ }
+
+ if (nGroups == 6) {
+ short cost0, cost1, cost2, cost3, cost4, cost5;
+ cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
+ for (i = gs; i <= ge; i++) {
+ short icv = szptr[i];
+ cost0 += len[0][icv];
+ cost1 += len[1][icv];
+ cost2 += len[2][icv];
+ cost3 += len[3][icv];
+ cost4 += len[4][icv];
+ cost5 += len[5][icv];
+ }
+ cost[0] = cost0;
+ cost[1] = cost1;
+ cost[2] = cost2;
+ cost[3] = cost3;
+ cost[4] = cost4;
+ cost[5] = cost5;
+ } else {
+ for (i = gs; i <= ge; i++) {
+ short icv = szptr[i];
+ for (t = 0; t < nGroups; t++) {
+ cost[t] += len[t][icv];
+ }
+ }
+ }
+
+ /*
+ Find the coding table which is best for this group,
+ and record its identity in the selector table.
+ */
+ bc = 999999999;
+ bt = -1;
+ for (t = 0; t < nGroups; t++) {
+ if (cost[t] < bc) {
+ bc = cost[t];
+ bt = t;
+ }
+ };
+ totc += bc;
+ fave[bt]++;
+ selector[nSelectors] = (char) bt;
+ nSelectors++;
+
+ /*
+ Increment the symbol frequencies for the selected table.
+ */
+ for (i = gs; i <= ge; i++) {
+ rfreq[bt][szptr[i]]++;
+ }
+
+ gs = ge + 1;
+ }
+
+ /*
+ Recompute the tables based on the accumulated frequencies.
+ */
+ for (t = 0; t < nGroups; t++) {
+ hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
+ }
+ }
+
+ rfreq = null;
+ fave = null;
+ cost = null;
+
+ if (!(nGroups < 8)) {
+ panic();
+ }
+ if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) {
+ panic();
+ }
+
+
+ /* Compute MTF values for the selectors. */
+ {
+ char[] pos = new char[N_GROUPS];
+ char ll_i, tmp2, tmp;
+ for (i = 0; i < nGroups; i++) {
+ pos[i] = (char) i;
+ }
+ for (i = 0; i < nSelectors; i++) {
+ ll_i = selector[i];
+ j = 0;
+ tmp = pos[j];
+ while (ll_i != tmp) {
+ j++;
+ tmp2 = tmp;
+ tmp = pos[j];
+ pos[j] = tmp2;
+ }
+ pos[0] = tmp;
+ selectorMtf[i] = (char) j;
+ }
+ }
+
+ int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE];
+
+ /* Assign actual codes for the tables. */
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (len[t][i] > maxLen) {
+ maxLen = len[t][i];
+ }
+ if (len[t][i] < minLen) {
+ minLen = len[t][i];
+ }
+ }
+ if (maxLen > 20) {
+ panic();
+ }
+ if (minLen < 1) {
+ panic();
+ }
+ hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
+ }
+
+ /* Transmit the mapping table. */
+ {
+ boolean[] inUse16 = new boolean[16];
+ for (i = 0; i < 16; i++) {
+ inUse16[i] = false;
+ for (j = 0; j < 16; j++) {
+ if (inUse[i * 16 + j]) {
+ inUse16[i] = true;
+ }
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ bsW(1, 1);
+ } else {
+ bsW(1, 0);
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ if (inUse16[i]) {
+ for (j = 0; j < 16; j++) {
+ if (inUse[i * 16 + j]) {
+ bsW(1, 1);
+ } else {
+ bsW(1, 0);
+ }
+ }
+ }
+ }
+
+ }
+
+ /* Now the selectors. */
+ bsW (3, nGroups);
+ bsW (15, nSelectors);
+ for (i = 0; i < nSelectors; i++) {
+ for (j = 0; j < selectorMtf[i]; j++) {
+ bsW(1, 1);
+ }
+ bsW(1, 0);
+ }
+
+ /* Now the coding tables. */
+ for (t = 0; t < nGroups; t++) {
+ int curr = len[t][0];
+ bsW(5, curr);
+ for (i = 0; i < alphaSize; i++) {
+ while (curr < len[t][i]) {
+ bsW(2, 2);
+ curr++; /* 10 */
+ }
+ while (curr > len[t][i]) {
+ bsW(2, 3);
+ curr--; /* 11 */
+ }
+ bsW (1, 0);
+ }
+ }
+
+ /* And finally, the block data proper */
+ selCtr = 0;
+ gs = 0;
+ while (true) {
+ if (gs >= nMTF) {
+ break;
+ }
+ ge = gs + G_SIZE - 1;
+ if (ge >= nMTF) {
+ ge = nMTF - 1;
+ }
+ for (i = gs; i <= ge; i++) {
+ bsW(len[selector[selCtr]][szptr[i]],
+ code[selector[selCtr]][szptr[i]]);
+ }
+
+ gs = ge + 1;
+ selCtr++;
+ }
+ if (!(selCtr == nSelectors)) {
+ panic();
+ }
+ }
+
+ private void moveToFrontCodeAndSend () throws IOException {
+ bsPutIntVS(24, origPtr);
+ generateMTFValues();
+ sendMTFValues();
+ }
+
+ private OutputStream bsStream;
+
+ private void simpleSort(int lo, int hi, int d) {
+ int i, j, h, bigN, hp;
+ int v;
+
+ bigN = hi - lo + 1;
+ if (bigN < 2) {
+ return;
+ }
+
+ hp = 0;
+ while (incs[hp] < bigN) {
+ hp++;
+ }
+ hp--;
+
+ for (; hp >= 0; hp--) {
+ h = incs[hp];
+
+ i = lo + h;
+ while (true) {
+ /* copy 1 */
+ if (i > hi) {
+ break;
+ }
+ v = zptr[i];
+ j = i;
+ while (fullGtU(zptr[j - h] + d, v + d)) {
+ zptr[j] = zptr[j - h];
+ j = j - h;
+ if (j <= (lo + h - 1)) {
+ break;
+ }
+ }
+ zptr[j] = v;
+ i++;
+
+ /* copy 2 */
+ if (i > hi) {
+ break;
+ }
+ v = zptr[i];
+ j = i;
+ while (fullGtU(zptr[j - h] + d, v + d)) {
+ zptr[j] = zptr[j - h];
+ j = j - h;
+ if (j <= (lo + h - 1)) {
+ break;
+ }
+ }
+ zptr[j] = v;
+ i++;
+
+ /* copy 3 */
+ if (i > hi) {
+ break;
+ }
+ v = zptr[i];
+ j = i;
+ while (fullGtU(zptr[j - h] + d, v + d)) {
+ zptr[j] = zptr[j - h];
+ j = j - h;
+ if (j <= (lo + h - 1)) {
+ break;
+ }
+ }
+ zptr[j] = v;
+ i++;
+
+ if (workDone > workLimit && firstAttempt) {
+ return;
+ }
+ }
+ }
+ }
+
+ private void vswap(int p1, int p2, int n) {
+ int temp = 0;
+ while (n > 0) {
+ temp = zptr[p1];
+ zptr[p1] = zptr[p2];
+ zptr[p2] = temp;
+ p1++;
+ p2++;
+ n--;
+ }
+ }
+
+ private char med3(char a, char b, char c) {
+ char t;
+ if (a > b) {
+ t = a;
+ a = b;
+ b = t;
+ }
+ if (b > c) {
+ t = b;
+ b = c;
+ c = t;
+ }
+ if (a > b) {
+ b = a;
+ }
+ return b;
+ }
+
+ private static class StackElem {
+ int ll;
+ int hh;
+ int dd;
+ }
+
+ private void qSort3(int loSt, int hiSt, int dSt) {
+ int unLo, unHi, ltLo, gtHi, med, n, m;
+ int sp, lo, hi, d;
+ StackElem[] stack = new StackElem[QSORT_STACK_SIZE];
+ for (int count = 0; count < QSORT_STACK_SIZE; count++) {
+ stack[count] = new StackElem();
+ }
+
+ sp = 0;
+
+ stack[sp].ll = loSt;
+ stack[sp].hh = hiSt;
+ stack[sp].dd = dSt;
+ sp++;
+
+ while (sp > 0) {
+ if (sp >= QSORT_STACK_SIZE) {
+ panic();
+ }
+
+ sp--;
+ lo = stack[sp].ll;
+ hi = stack[sp].hh;
+ d = stack[sp].dd;
+
+ if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
+ simpleSort(lo, hi, d);
+ if (workDone > workLimit && firstAttempt) {
+ return;
+ }
+ continue;
+ }
+
+ med = med3(block[zptr[lo] + d + 1],
+ block[zptr[hi ] + d + 1],
+ block[zptr[(lo + hi) >> 1] + d + 1]);
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (true) {
+ while (true) {
+ if (unLo > unHi) {
+ break;
+ }
+ n = ((int) block[zptr[unLo] + d + 1]) - med;
+ if (n == 0) {
+ int temp = 0;
+ temp = zptr[unLo];
+ zptr[unLo] = zptr[ltLo];
+ zptr[ltLo] = temp;
+ ltLo++;
+ unLo++;
+ continue;
+ };
+ if (n > 0) {
+ break;
+ }
+ unLo++;
+ }
+ while (true) {
+ if (unLo > unHi) {
+ break;
+ }
+ n = ((int) block[zptr[unHi] + d + 1]) - med;
+ if (n == 0) {
+ int temp = 0;
+ temp = zptr[unHi];
+ zptr[unHi] = zptr[gtHi];
+ zptr[gtHi] = temp;
+ gtHi--;
+ unHi--;
+ continue;
+ };
+ if (n < 0) {
+ break;
+ }
+ unHi--;
+ }
+ if (unLo > unHi) {
+ break;
+ }
+ int temp = 0;
+ temp = zptr[unLo];
+ zptr[unLo] = zptr[unHi];
+ zptr[unHi] = temp;
+ unLo++;
+ unHi--;
+ }
+
+ if (gtHi < ltLo) {
+ stack[sp].ll = lo;
+ stack[sp].hh = hi;
+ stack[sp].dd = d + 1;
+ sp++;
+ continue;
+ }
+
+ n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo);
+ vswap(lo, unLo - n, n);
+ m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi);
+ vswap(unLo, hi - m + 1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ stack[sp].ll = lo;
+ stack[sp].hh = n;
+ stack[sp].dd = d;
+ sp++;
+
+ stack[sp].ll = n + 1;
+ stack[sp].hh = m - 1;
+ stack[sp].dd = d + 1;
+ sp++;
+
+ stack[sp].ll = m;
+ stack[sp].hh = hi;
+ stack[sp].dd = d;
+ sp++;
+ }
+ }
+
+ private void mainSort() {
+ int i, j, ss, sb;
+ int[] runningOrder = new int[256];
+ int[] copy = new int[256];
+ boolean[] bigDone = new boolean[256];
+ int c1, c2;
+ int numQSorted;
+
+ /*
+ In the various block-sized structures, live data runs
+ from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
+ set up the overshoot area for block.
+ */
+
+ // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
+ for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
+ block[last + i + 2] = block[(i % (last + 1)) + 1];
+ }
+ for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) {
+ quadrant[i] = 0;
+ }
+
+ block[0] = (char) (block[last + 1]);
+
+ if (last < 4000) {
+ /*
+ Use simpleSort(), since the full sorting mechanism
+ has quite a large constant overhead.
+ */
+ for (i = 0; i <= last; i++) {
+ zptr[i] = i;
+ }
+ firstAttempt = false;
+ workDone = workLimit = 0;
+ simpleSort(0, last, 0);
+ } else {
+ numQSorted = 0;
+ for (i = 0; i <= 255; i++) {
+ bigDone[i] = false;
+ }
+
+ for (i = 0; i <= 65536; i++) {
+ ftab[i] = 0;
+ }
+
+ c1 = block[0];
+ for (i = 0; i <= last; i++) {
+ c2 = block[i + 1];
+ ftab[(c1 << 8) + c2]++;
+ c1 = c2;
+ }
+
+ for (i = 1; i <= 65536; i++) {
+ ftab[i] += ftab[i - 1];
+ }
+
+ c1 = block[1];
+ for (i = 0; i < last; i++) {
+ c2 = block[i + 2];
+ j = (c1 << 8) + c2;
+ c1 = c2;
+ ftab[j]--;
+ zptr[ftab[j]] = i;
+ }
+
+ j = ((block[last + 1]) << 8) + (block[1]);
+ ftab[j]--;
+ zptr[ftab[j]] = last;
+
+ /*
+ Now ftab contains the first loc of every small bucket.
+ Calculate the running order, from smallest to largest
+ big bucket.
+ */
+
+ for (i = 0; i <= 255; i++) {
+ runningOrder[i] = i;
+ }
+
+ {
+ int vv;
+ int h = 1;
+ do {
+ h = 3 * h + 1;
+ }
+ while (h <= 256);
+ do {
+ h = h / 3;
+ for (i = h; i <= 255; i++) {
+ vv = runningOrder[i];
+ j = i;
+ while ((ftab[((runningOrder[j - h]) + 1) << 8]
+ - ftab[(runningOrder[j - h]) << 8]) >
+ (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) {
+ runningOrder[j] = runningOrder[j - h];
+ j = j - h;
+ if (j <= (h - 1)) {
+ break;
+ }
+ }
+ runningOrder[j] = vv;
+ }
+ } while (h != 1);
+ }
+
+ /*
+ The main sorting loop.
+ */
+ for (i = 0; i <= 255; i++) {
+
+ /*
+ Process big buckets, starting with the least full.
+ */
+ ss = runningOrder[i];
+
+ /*
+ Complete the big bucket [ss] by quicksorting
+ any unsorted small buckets [ss, j]. Hopefully
+ previous pointer-scanning phases have already
+ completed many of the small buckets [ss, j], so
+ we don't have to sort them at all.
+ */
+ for (j = 0; j <= 255; j++) {
+ sb = (ss << 8) + j;
+ if (!((ftab[sb] & SETMASK) == SETMASK)) {
+ int lo = ftab[sb] & CLEARMASK;
+ int hi = (ftab[sb + 1] & CLEARMASK) - 1;
+ if (hi > lo) {
+ qSort3(lo, hi, 2);
+ numQSorted += (hi - lo + 1);
+ if (workDone > workLimit && firstAttempt) {
+ return;
+ }
+ }
+ ftab[sb] |= SETMASK;
+ }
+ }
+
+ /*
+ The ss big bucket is now done. Record this fact,
+ and update the quadrant descriptors. Remember to
+ update quadrants in the overshoot area too, if
+ necessary. The "if (i < 255)" test merely skips
+ this updating for the last bucket processed, since
+ updating for the last bucket is pointless.
+ */
+ bigDone[ss] = true;
+
+ if (i < 255) {
+ int bbStart = ftab[ss << 8] & CLEARMASK;
+ int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
+ int shifts = 0;
+
+ while ((bbSize >> shifts) > 65534) {
+ shifts++;
+ }
+
+ for (j = 0; j < bbSize; j++) {
+ int a2update = zptr[bbStart + j];
+ int qVal = (j >> shifts);
+ quadrant[a2update] = qVal;
+ if (a2update < NUM_OVERSHOOT_BYTES) {
+ quadrant[a2update + last + 1] = qVal;
+ }
+ }
+
+ if (!(((bbSize - 1) >> shifts) <= 65535)) {
+ panic();
+ }
+ }
+
+ /*
+ Now scan this big bucket so as to synthesise the
+ sorted order for small buckets [t, ss] for all t != ss.
+ */
+ for (j = 0; j <= 255; j++) {
+ copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
+ }
+
+ for (j = ftab[ss << 8] & CLEARMASK;
+ j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) {
+ c1 = block[zptr[j]];
+ if (!bigDone[c1]) {
+ zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
+ copy[c1]++;
+ }
+ }
+
+ for (j = 0; j <= 255; j++) {
+ ftab[(j << 8) + ss] |= SETMASK;
+ }
+ }
+ }
+ }
+
+ private void randomiseBlock() {
+ int i;
+ int rNToGo = 0;
+ int rTPos = 0;
+ for (i = 0; i < 256; i++) {
+ inUse[i] = false;
+ }
+
+ for (i = 0; i <= last; i++) {
+ if (rNToGo == 0) {
+ rNToGo = (char) rNums[rTPos];
+ rTPos++;
+ if (rTPos == 512) {
+ rTPos = 0;
+ }
+ }
+ rNToGo--;
+ block[i + 1] ^= ((rNToGo == 1) ? 1 : 0);
+ // handle 16 bit signed numbers
+ block[i + 1] &= 0xFF;
+
+ inUse[block[i + 1]] = true;
+ }
+ }
+
+ private void doReversibleTransformation() {
+ int i;
+
+ workLimit = workFactor * last;
+ workDone = 0;
+ blockRandomised = false;
+ firstAttempt = true;
+
+ mainSort();
+
+ if (workDone > workLimit && firstAttempt) {
+ randomiseBlock();
+ workLimit = workDone = 0;
+ blockRandomised = true;
+ firstAttempt = false;
+ mainSort();
+ }
+
+ origPtr = -1;
+ for (i = 0; i <= last; i++) {
+ if (zptr[i] == 0) {
+ origPtr = i;
+ break;
+ }
+ };
+
+ if (origPtr == -1) {
+ panic();
+ }
+ }
+
+ private boolean fullGtU(int i1, int i2) {
+ int k;
+ char c1, c2;
+ int s1, s2;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ i1++;
+ i2++;
+
+ k = last + 1;
+
+ do {
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ s1 = quadrant[i1];
+ s2 = quadrant[i2];
+ if (s1 != s2) {
+ return (s1 > s2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ s1 = quadrant[i1];
+ s2 = quadrant[i2];
+ if (s1 != s2) {
+ return (s1 > s2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ s1 = quadrant[i1];
+ s2 = quadrant[i2];
+ if (s1 != s2) {
+ return (s1 > s2);
+ }
+ i1++;
+ i2++;
+
+ c1 = block[i1 + 1];
+ c2 = block[i2 + 1];
+ if (c1 != c2) {
+ return (c1 > c2);
+ }
+ s1 = quadrant[i1];
+ s2 = quadrant[i2];
+ if (s1 != s2) {
+ return (s1 > s2);
+ }
+ i1++;
+ i2++;
+
+ if (i1 > last) {
+ i1 -= last;
+ i1--;
+ };
+ if (i2 > last) {
+ i2 -= last;
+ i2--;
+ };
+
+ k -= 4;
+ workDone++;
+ } while (k >= 0);
+
+ return false;
+ }
+
+ /*
+ Knuth's increments seem to work better
+ than Incerpi-Sedgewick here. Possibly
+ because the number of elems to sort is
+ usually small, typically <= 20.
+ */
+ private int[] incs = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+
+ private void allocateCompressStructures () {
+ int n = baseBlockSize * blockSize100k;
+ block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)];
+ quadrant = new int[(n + NUM_OVERSHOOT_BYTES)];
+ zptr = new int[n];
+ ftab = new int[65537];
+
+ if (block == null || quadrant == null || zptr == null
+ || ftab == null) {
+ //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
+ //compressOutOfMemory ( totalDraw, n );
+ }
+
+ /*
+ The back end needs a place to store the MTF values
+ whilst it calculates the coding tables. We could
+ put them in the zptr array. However, these values
+ will fit in a short, so we overlay szptr at the
+ start of zptr, in the hope of reducing the number
+ of cache misses induced by the multiple traversals
+ of the MTF values when calculating coding tables.
+ Seems to improve compression speed by about 1%.
+ */
+ // szptr = zptr;
+
+
+ szptr = new short[2 * n];
+ }
+
+ private void generateMTFValues() {
+ char[] yy = new char[256];
+ int i, j;
+ char tmp;
+ char tmp2;
+ int zPend;
+ int wr;
+ int EOB;
+
+ makeMaps();
+ EOB = nInUse + 1;
+
+ for (i = 0; i <= EOB; i++) {
+ mtfFreq[i] = 0;
+ }
+
+ wr = 0;
+ zPend = 0;
+ for (i = 0; i < nInUse; i++) {
+ yy[i] = (char) i;
+ }
+
+
+ for (i = 0; i <= last; i++) {
+ char ll_i;
+
+ ll_i = unseqToSeq[block[zptr[i]]];
+
+ j = 0;
+ tmp = yy[j];
+ while (ll_i != tmp) {
+ j++;
+ tmp2 = tmp;
+ tmp = yy[j];
+ yy[j] = tmp2;
+ };
+ yy[0] = tmp;
+
+ if (j == 0) {
+ zPend++;
+ } else {
+ if (zPend > 0) {
+ zPend--;
+ while (true) {
+ switch (zPend % 2) {
+ case 0:
+ szptr[wr] = (short) RUNA;
+ wr++;
+ mtfFreq[RUNA]++;
+ break;
+ case 1:
+ szptr[wr] = (short) RUNB;
+ wr++;
+ mtfFreq[RUNB]++;
+ break;
+ };
+ if (zPend < 2) {
+ break;
+ }
+ zPend = (zPend - 2) / 2;
+ };
+ zPend = 0;
+ }
+ szptr[wr] = (short) (j + 1);
+ wr++;
+ mtfFreq[j + 1]++;
+ }
+ }
+
+ if (zPend > 0) {
+ zPend--;
+ while (true) {
+ switch (zPend % 2) {
+ case 0:
+ szptr[wr] = (short) RUNA;
+ wr++;
+ mtfFreq[RUNA]++;
+ break;
+ case 1:
+ szptr[wr] = (short) RUNB;
+ wr++;
+ mtfFreq[RUNB]++;
+ break;
+ }
+ if (zPend < 2) {
+ break;
+ }
+ zPend = (zPend - 2) / 2;
+ }
+ }
+
+ szptr[wr] = (short) EOB;
+ wr++;
+ mtfFreq[EOB]++;
+
+ nMTF = wr;
+ }
+}
+
+
diff --git a/src/main/java/org/apache/tools/bzip2/CRC.java b/src/main/java/org/apache/tools/bzip2/CRC.java
new file mode 100644
index 000000000..bc8bc644b
--- /dev/null
+++ b/src/main/java/org/apache/tools/bzip2/CRC.java
@@ -0,0 +1,167 @@
+/*
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001-2002 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowlegement may appear in the software itself,
+ * if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "Ant" and "Apache Software
+ * Foundation" must not be used to endorse or promote products derived
+ * from this software without prior written permission. For written
+ * permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ * nor may "Apache" appear in their names without prior written
+ * permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.tools.bzip2;
+
+/**
+ * A simple class the hold and calculate the CRC for sanity checking
+ * of the data.
+ *
+ * @author Keiron Liddle
+ */
+class CRC {
+ public static int crc32Table[] = {
+ 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
+ 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+ 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
+ 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
+ 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
+ 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+ 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+ 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
+ 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
+ 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
+ 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
+ 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+ 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
+ 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
+ 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+ 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
+ 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+ 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+ 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
+ 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
+ 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
+ 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+ 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
+ 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+ 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
+ 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
+ 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+ 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
+ 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
+ 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+ 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
+ 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+ 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+ 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
+ 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
+ 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+ 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+ 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
+ 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
+ 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
+ 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
+ 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+ 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
+ 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
+ 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
+ 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
+ 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+ 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+ 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
+ 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
+ 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+ 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+ 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
+ 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+ 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
+ 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
+ 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+ 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
+ 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
+ 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+ 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
+ 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+ 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
+ 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
+ };
+
+ public CRC() {
+ initialiseCRC();
+ }
+
+ void initialiseCRC() {
+ globalCrc = 0xffffffff;
+ }
+
+ int getFinalCRC() {
+ return ~globalCrc;
+ }
+
+ int getGlobalCRC() {
+ return globalCrc;
+ }
+
+ void setGlobalCRC(int newCrc) {
+ globalCrc = newCrc;
+ }
+
+ void updateCRC(int inCh) {
+ int temp = (globalCrc >> 24) ^ inCh;
+ if (temp < 0) {
+ temp = 256 + temp;
+ }
+ globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
+ }
+
+ int globalCrc;
+}
+