diff --git a/.classpath b/.classpath index 10ae32917..4a78265c3 100644 --- a/.classpath +++ b/.classpath @@ -15,11 +15,12 @@ + + - - + diff --git a/LICENSE.txt b/LICENSE.txt index a4d044a9e..f6144133f 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, Metaweb Technologies, Inc. All rights reserved. + * Copyright (c) 2010 Metaweb Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -36,9 +36,9 @@ See the 'licenses' directory for a list of the licenses for the libraries we dep ordered here by license: licenses/apache2.0.LICENSE.txt - ant (package org.apache.tools.tar) - bzip2 (package org.apache.tools.bzip2) calendar-parser (package com.metaweb.gridworks.expr.util) + ant-tools + bzip2 commons-lang commons-codec jdatapath @@ -58,7 +58,7 @@ licenses/dom4j.LICENSE.txt (BSD family) dom4j licenses/simile.LICENSE.txt (BSD family) - vicino (package edu.mit.simile.vicino) + vicino licenses/arithcode.LICENSE.txt (BSD family) arithcode diff --git a/lib-src/ant-tools-1.8.0-sources.jar b/lib-src/ant-tools-1.8.0-sources.jar new file mode 100644 index 000000000..bc3be38e3 Binary files /dev/null and b/lib-src/ant-tools-1.8.0-sources.jar differ diff --git a/lib-src/vicino-1.1-sources.jar b/lib-src/vicino-1.1-sources.jar new file mode 100644 index 000000000..9960752e0 Binary files /dev/null and b/lib-src/vicino-1.1-sources.jar differ diff --git a/lib/ant-tools-1.8.0.jar b/lib/ant-tools-1.8.0.jar new file mode 100644 index 000000000..4006b6682 Binary files /dev/null and b/lib/ant-tools-1.8.0.jar differ diff --git a/lib/apache-tools-tar.jar b/lib/apache-tools-tar.jar deleted file mode 100644 index fecb83825..000000000 Binary files a/lib/apache-tools-tar.jar and /dev/null differ diff --git a/lib/vicino-1.1.jar b/lib/vicino-1.1.jar new file mode 100644 index 000000000..7752a5df7 Binary files /dev/null and b/lib/vicino-1.1.jar differ diff --git a/src/main/java/com/metaweb/gridworks/Gridworks.java b/src/main/java/com/metaweb/gridworks/Gridworks.java index c21ad618e..395222092 100644 --- a/src/main/java/com/metaweb/gridworks/Gridworks.java +++ b/src/main/java/com/metaweb/gridworks/Gridworks.java @@ -37,6 +37,7 @@ import com.metaweb.util.threads.ThreadPoolExecutorAdapter; public class Gridworks { static private String version; + static private File tempDir; private static Logger root = Logger.getRootLogger(); private static Logger logger = Logger.getLogger("com.metaweb.gridworks"); @@ -65,6 +66,10 @@ public class Gridworks { return version; } + public static File getTempFile(String name) { + return new File(tempDir, name); + } + public static void main(String[] args) throws Exception { // tell jetty to use SLF4J for logging instead of its own stuff @@ -84,6 +89,9 @@ public class Gridworks { jetty_logger.setLevel(Level.WARN); version = Configurations.get("gridworks.version","trunk"); + + tempDir = new File(Configurations.get("gridworks.temp","temp")); + if (!tempDir.exists()) tempDir.mkdirs(); Gridworks gridworks = new Gridworks(); diff --git a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java index 8ba4cac25..fe6793674 100644 --- a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java @@ -1,6 +1,9 @@ package com.metaweb.gridworks.commands.edit; import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -9,12 +12,24 @@ import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Properties; +import java.util.Map.Entry; +import java.util.zip.GZIPInputStream; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.apache.tools.bzip2.CBZip2InputStream; +import org.apache.tools.tar.TarEntry; +import org.apache.tools.tar.TarInputStream; + import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; import com.metaweb.gridworks.Gridworks; @@ -91,7 +106,9 @@ public class CreateProjectCommand extends Command { while ((part = parser.readNextPart()) != null) { if (part.isFile()) { - internalImportFilePart((FilePart) part, project, options); + + FilePart filePart = (FilePart) part; + internalImportFile(project, options, filePart.getFileName(), filePart.getInputStream()); } else if (part.isParam()) { ParamPart paramPart = (ParamPart) part; @@ -118,15 +135,120 @@ public class CreateProjectCommand extends Command { } } - protected void internalImportFilePart( - FilePart filePart, + protected void internalImportFile( Project project, - Properties options + Properties options, + String fileName, + InputStream inputStream ) throws Exception { - - Importer importer = guessImporter(options, null, filePart.getFileName()); - - internalInvokeImporter(project, importer, options, filePart.getInputStream(), null); + + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tar.bz2")) { + // first, save the file on disk, since we need two passes and we might + // not have enough memory to keep it all in there + File file = save(inputStream); + + // in the first pass, gather statistics about what files are in there + // unfortunately, we have to rely on files extensions, which is horrible but + // better than nothing + BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file)); + InputStream is = (fileName.endsWith(".tar.gz")) ? new GZIPInputStream(stream): new CBZip2InputStream(stream); + TarInputStream tis = new TarInputStream(is); + HashMap ext_map = new HashMap(); + while (true) { + TarEntry entry = tis.getNextEntry(); + if (entry == null) break; + if (!entry.isDirectory()) { + String name = entry.getName(); + String ext = getExtension(name)[1]; + if (ext_map.containsKey(ext)) { + ext_map.put(ext, ext_map.get(ext) + 1); + } else { + ext_map.put(ext, 1); + } + } + } + stream.close(); + + // sort extensions by how often they appear + List> values = new ArrayList>(ext_map.entrySet()); + Collections.sort(values, new ValuesComparator()); + + if (values.size() == 0) { + throw new RuntimeException("The archive contains no files."); + } + + // this will contain the set of extensions we'll load from the archive + HashSet exts = new HashSet(); + + // find the extension that is most frequent or those who share the highest frequency value + Entry most_frequent = values.get(0); + Entry second_most_frequent = values.get(1); + if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner + exts.add(most_frequent.getKey()); + } else { // multiple extensions have the same frequency + int winning_frequency = most_frequent.getValue(); + for (Entry e : values) { + if (e.getValue() == winning_frequency) { + exts.add(e.getKey()); + } + } + } + Gridworks.log("Most frequent extensions: " + exts.toString()); + + + } else if (fileName.endsWith(".zip")) { + + } else if (fileName.endsWith(".gz")) { + String[] frags = getExtension(fileName); + internalImportFile(project, options, frags[0], new GZIPInputStream(inputStream)); + } else if (fileName.endsWith(".bz2")) { + String[] frags = getExtension(fileName); + internalImportFile(project, options, frags[0], new CBZip2InputStream(inputStream)); + } else { + load(project, options, fileName, inputStream); + } + } + + public class ValuesComparator implements Comparator> { + public int compare(Entry o1, Entry o2) { + return o2.getValue() - o1.getValue(); + } + } + + private void load(Project project, Properties options, String fileName, InputStream inputStream) throws Exception { + Importer importer = guessImporter(options, null, fileName); + internalInvokeImporter(project, importer, options, inputStream, null); + inputStream.close(); + } + + private File save(InputStream is) throws IOException { + File temp = Gridworks.getTempFile(Long.toString(System.currentTimeMillis())); + temp.deleteOnExit(); + copy(is,temp); + is.close(); + return temp; + } + + private String[] getExtension(String filename) { + String[] result = new String[2]; + int ext_index = filename.lastIndexOf("."); + result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index); + result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1); + return result; + } + + private static long copy(InputStream input, File file) throws IOException { + FileOutputStream output = new FileOutputStream(file); + byte[] buffer = new byte[4 * 1024]; + long count = 0; + int n = 0; + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + output.close(); + input.close(); + return count; } protected void internalImportURL( @@ -237,17 +359,9 @@ public class CreateProjectCommand extends Command { new InputStreamReader(inputStream); } - try { - importer.read(reader, project, options, skip, limit); - } finally { - reader.close(); - } + importer.read(reader, project, options, skip, limit); } else { - try { - importer.read(inputStream, project, options, skip, limit); - } finally { - inputStream.close(); - } + importer.read(inputStream, project, options, skip, limit); } } @@ -295,6 +409,12 @@ public class CreateProjectCommand extends Command { return new ExcelImporter(false); } else if("application/x-xls".equals(contentType)) { return new ExcelImporter(true); + } else if("application/xml".equals(contentType) || + "text/xml".equals(contentType) || + "application/rss+xml".equals(contentType) || + "application/atom+xml".equals(contentType) || + "application/rdf+xml".equals(contentType)) { + return new XmlImporter(); } } else if (fileName != null) { fileName = fileName.toLowerCase(); diff --git a/src/main/java/edu/mit/simile/vicino/Cluster.java b/src/main/java/edu/mit/simile/vicino/Cluster.java deleted file mode 100644 index dd5b9c327..000000000 --- a/src/main/java/edu/mit/simile/vicino/Cluster.java +++ /dev/null @@ -1,79 +0,0 @@ -package edu.mit.simile.vicino; - -import java.io.Serializable; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import edu.mit.simile.vicino.clustering.Clusterer; -import edu.mit.simile.vicino.clustering.NGramClusterer; -import edu.mit.simile.vicino.clustering.VPTreeClusterer; -import edu.mit.simile.vicino.distances.Distance; - -public class Cluster extends Operator { - - public static void main(String[] args) throws Exception { - (new Cluster()).init(args); - } - - public void init(String[] args) throws Exception { - Distance distance = getDistance(args[0]); - List strings = getStrings(args[1]); - double radius = Double.parseDouble(args[2]); - int blocking_size = Integer.parseInt(args[3]); - - long vptree_start = System.currentTimeMillis(); - Clusterer vptree_clusterer = new VPTreeClusterer(distance); - for (String s: strings) { - vptree_clusterer.populate(s); - } - List> vptree_clusters = vptree_clusterer.getClusters(radius); - long vptree_elapsed = System.currentTimeMillis() - vptree_start; - int vptree_distances = distance.getCount(); - distance.resetCounter(); - - long ngram_start = System.currentTimeMillis(); - Clusterer ngram_clusterer = new NGramClusterer(distance,blocking_size); - for (String s: strings) { - ngram_clusterer.populate(s); - } - List> ngram_clusters = ngram_clusterer.getClusters(radius); - long ngram_elapsed = System.currentTimeMillis() - ngram_start; - int ngram_distances = distance.getCount(); - distance.resetCounter(); - - log("VPTree found " + vptree_clusters.size() + " in " + vptree_elapsed + " ms with " + vptree_distances + " distances\n"); - log("NGram found " + ngram_clusters.size() + " in " + ngram_elapsed + " ms with " + ngram_distances + " distances\n"); - - if (vptree_clusters.size() > ngram_clusters.size()) { - log("VPTree clusterer found these clusters the other method couldn't: "); - diff(vptree_clusters,ngram_clusters); - } else if (ngram_clusters.size() > vptree_clusters.size()) { - log("NGram clusterer found these clusters the other method couldn't: "); - diff(ngram_clusters,vptree_clusters); - } - - System.exit(0); - } - - private void diff(List> more, List> base) { - Set> holder = new HashSet>(base.size()); - - for (Set s : base) { - holder.add(s); - } - - for (Set s : more) { - if (!holder.contains(s)) { - printCluster(s); - } - } - } - - private void printCluster(Set cluster) { - for (Serializable s : cluster) { - log(s.toString()); - } - log(""); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/Distributor.java b/src/main/java/edu/mit/simile/vicino/Distributor.java deleted file mode 100644 index b9064817d..000000000 --- a/src/main/java/edu/mit/simile/vicino/Distributor.java +++ /dev/null @@ -1,61 +0,0 @@ -package edu.mit.simile.vicino; - -import java.util.List; - -import edu.mit.simile.vicino.distances.Distance; - -public class Distributor extends Operator { - - private static final int COLUMNS = 70; - private static final char CHAR = '='; - - public static void main(String[] args) throws Exception { - - Distance d = getDistance(args[0]); - - List strings = getStrings(args[1]); - - int buckets = Integer.parseInt(args[2]); - - long start = System.currentTimeMillis(); - int[] values = new int[buckets]; - - int size = strings.size(); - for (int i = 0; i < size; i++) { - String x = (String) strings.get((int) (Math.random() * size)); - String y = (String) strings.get((int) (Math.random() * size)); - int dist = (int) (buckets * d.d(x, y)); - values[dist]++; - System.out.print("."); - } - System.out.println(); - - long stop = System.currentTimeMillis(); - float m = ((float) (stop - start)) / (float) size; - - int maxValue = 0; - for (int i = 0; i < buckets; i++) { - if (values[i] > maxValue) { - maxValue = values[i]; - } - } - - System.out - .println("+-------------------------------------------------------------------"); - for (int i = 0; i < buckets; i++) { - System.out.println("|" + bar(COLUMNS * values[i] / maxValue)); - } - System.out - .println("+-------------------------------------------------------------------"); - - System.out.println("\n Each distance calculation took: " + m + " millis"); - } - - static private String bar(int value) { - StringBuffer b = new StringBuffer(value); - for (int i = 0; i < value; i++) { - b.append(CHAR); - } - return b.toString(); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/Meter.java b/src/main/java/edu/mit/simile/vicino/Meter.java deleted file mode 100644 index b4fb8cf69..000000000 --- a/src/main/java/edu/mit/simile/vicino/Meter.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.mit.simile.vicino; - -import edu.mit.simile.vicino.distances.Distance; - -public class Meter extends Operator { - - public static void main(String[] args) throws Exception { - Distance d = getDistance(args[0]); - System.out.println(args[1] + " <- " + d.d(args[1], args[2]) + " -> " + args[2]); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/NGramTokenizer.java b/src/main/java/edu/mit/simile/vicino/NGramTokenizer.java deleted file mode 100644 index 6d4891e82..000000000 --- a/src/main/java/edu/mit/simile/vicino/NGramTokenizer.java +++ /dev/null @@ -1,94 +0,0 @@ -package edu.mit.simile.vicino; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Pattern; - -import com.wcohen.ss.api.Token; -import com.wcohen.ss.api.Tokenizer; - -public class NGramTokenizer implements Tokenizer { - - private int ngram_size; - - public NGramTokenizer(int ngram_size) { - this.ngram_size = ngram_size; - } - - public Token[] tokenize(String str) { - str = normalize(str); - List tokens = new ArrayList(); - for (int i = 0; i < str.length(); i++) { - int index = i + ngram_size; - if (index <= str.length()) { - tokens.add(intern(str.substring(i,index))); - } - } - return (Token[]) tokens.toArray(new BasicToken[tokens.size()]); - } - - static final Pattern extra = Pattern.compile("\\p{Cntrl}|\\p{Punct}"); - static final Pattern whitespace = Pattern.compile("\\p{Space}+"); - - private String normalize(String s) { - s = s.trim(); - s = extra.matcher(s).replaceAll(""); - s = whitespace.matcher(s).replaceAll(" "); - s = s.toLowerCase(); - return s.intern(); - } - - private int nextId = 0; - private Map tokMap = new TreeMap(); - - public Token intern(String s) { - s = s.toLowerCase().intern(); - Token tok = tokMap.get(s); - if (tok == null) { - tok = new BasicToken(++nextId, s); - tokMap.put(s, tok); - } - return tok; - } - - public Iterator tokenIterator() { - return tokMap.values().iterator(); - } - - public int maxTokenIndex() { - return nextId; - } - - public class BasicToken implements Token, Comparable { - private final int index; - private final String value; - - BasicToken(int index, String value) { - this.index = index; - this.value = value; - } - - public String getValue() { - return value; - } - - public int getIndex() { - return index; - } - - public int compareTo(Token t) { - return index - t.getIndex(); - } - - public int hashCode() { - return value.hashCode(); - } - - public String toString() { - return "[token#" + getIndex() + ":" + getValue() + "]"; - } - } -} diff --git a/src/main/java/edu/mit/simile/vicino/Operator.java b/src/main/java/edu/mit/simile/vicino/Operator.java deleted file mode 100644 index b064860cf..000000000 --- a/src/main/java/edu/mit/simile/vicino/Operator.java +++ /dev/null @@ -1,47 +0,0 @@ -package edu.mit.simile.vicino; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.List; - -import edu.mit.simile.vicino.distances.Distance; - -public class Operator { - - static void log(String msg) { - System.out.println(msg); - } - - static Distance getDistance(String distance) throws Exception { - return (Distance) Class.forName("edu.mit.simile.vicino.distances." + distance + "Distance").newInstance(); - } - - static List getStrings(String fileName) throws IOException { - List strings = new ArrayList(); - - File file = new File(fileName); - if (file.isDirectory()) { - File[] files = file.listFiles(); - for (File f : files) { - getStrings(f, strings); - } - } else { - getStrings(file, strings); - } - - return strings; - } - - static void getStrings(File file, List strings) throws IOException { - BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); - String line; - while ((line = input.readLine()) != null) { - strings.add(line.trim().intern()); - } - input.close(); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/Seeker.java b/src/main/java/edu/mit/simile/vicino/Seeker.java deleted file mode 100644 index 7993e8bc9..000000000 --- a/src/main/java/edu/mit/simile/vicino/Seeker.java +++ /dev/null @@ -1,53 +0,0 @@ -package edu.mit.simile.vicino; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.Serializable; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - -import edu.mit.simile.vicino.distances.Distance; -import edu.mit.simile.vicino.vptree.VPTree; -import edu.mit.simile.vicino.vptree.VPTreeBuilder; -import edu.mit.simile.vicino.vptree.VPTreeSeeker; - -public class Seeker extends Operator { - - public static void main(String[] args) throws Exception { - Distance d = getDistance(args[0]); - - log("Working with distance: " + d); - List strings = getStrings(args[1]); - log("Obtained " + strings.size() + " from " + args[1]); - - log("Building VPTree..."); - VPTreeBuilder builder = new VPTreeBuilder(d); - VPTree tree = builder.buildVPTree(strings); - log("..done"); - - VPTreeSeeker seeker = new VPTreeSeeker(d, tree); - - log("type a string|range then hit return:"); - BufferedReader input = new BufferedReader(new InputStreamReader(System.in)); - String line = null; - while ((line = input.readLine()) != null) { - int index = line.indexOf('|'); - String query = line.substring(0, index); - float range = Float.parseFloat(line.substring(index + 1)); - long start = System.currentTimeMillis(); - Set results = seeker.range(query, range); - long stop = System.currentTimeMillis(); - Iterator j = results.iterator(); - if (j.hasNext()) { - while (j.hasNext()) { - String r = (String) j.next(); - log(" " + r); - } - log(" [done in " + (stop - start) + "ms]"); - } else { - log(" [no results found in " + (stop - start) + "ms]"); - } - } - } -} diff --git a/src/main/java/edu/mit/simile/vicino/Tester.java b/src/main/java/edu/mit/simile/vicino/Tester.java deleted file mode 100644 index 8dfc3400d..000000000 --- a/src/main/java/edu/mit/simile/vicino/Tester.java +++ /dev/null @@ -1,46 +0,0 @@ -package edu.mit.simile.vicino; - -import java.util.List; - -import edu.mit.simile.vicino.distances.Distance; - -public class Tester extends Operator { - - public static void main(String[] args) throws Exception { - Distance d = getDistance(args[0]); - - List strings = getStrings(args[1]); - - long start = System.currentTimeMillis(); - - int size = strings.size(); - for (int i = 0; i < size * size * size; i++) { - String x = (String) strings.get((int) (Math.random() * size)); - String y = (String) strings.get((int) (Math.random() * size)); - String z = (String) strings.get((int) (Math.random() * size)); - boolean metric = metric(x, y, z, d); - if (metric) { - System.out.println("metric"); - } else { - System.out.println("***** NOT METRIC *****"); - } - } - - long stop = System.currentTimeMillis(); - float m = ((float) (stop - start)) / (float) size; - - System.out.println("\n Each metric evaluation took: " + m + " millis"); - } - - static boolean metric(String x, String y, String z, Distance d) { - double dxx = d.d(x, x); - boolean identity = (dxx == 0.0f); - double dxy = d.d(x, y); - double dyx = d.d(y, x); - boolean simmetrical = (dxy == dyx); - double dxz = d.d(x, z); - double dyz = d.d(y, z); - boolean triangular = (dxz <= dxy + dyz); - return (identity && simmetrical && triangular); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/clustering/Clusterer.java b/src/main/java/edu/mit/simile/vicino/clustering/Clusterer.java deleted file mode 100644 index 89b72d8f5..000000000 --- a/src/main/java/edu/mit/simile/vicino/clustering/Clusterer.java +++ /dev/null @@ -1,20 +0,0 @@ -package edu.mit.simile.vicino.clustering; - -import java.io.Serializable; -import java.util.Comparator; -import java.util.List; -import java.util.Set; - -public abstract class Clusterer { - - public class SizeComparator implements Comparator> { - public int compare(Set o1, Set o2) { - return o2.size() - o1.size(); - } - } - - public abstract void populate(String s); - - public abstract List> getClusters(double radius); - -} diff --git a/src/main/java/edu/mit/simile/vicino/clustering/NGramClusterer.java b/src/main/java/edu/mit/simile/vicino/clustering/NGramClusterer.java deleted file mode 100644 index bbf3dacbf..000000000 --- a/src/main/java/edu/mit/simile/vicino/clustering/NGramClusterer.java +++ /dev/null @@ -1,194 +0,0 @@ -package edu.mit.simile.vicino.clustering; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.Map.Entry; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import com.wcohen.ss.api.Token; - -import edu.mit.simile.vicino.NGramTokenizer; -import edu.mit.simile.vicino.distances.Distance; - -public class NGramClusterer extends Clusterer { - - NGramTokenizer _tokenizer; - Distance _distance; - - Map> blocks = new HashMap>(); - - public NGramClusterer(Distance d, int blockSize) { - _tokenizer = new NGramTokenizer(blockSize); - _distance = d; - } - - public void populate(String s) { - Token[] tokens = _tokenizer.tokenize(s); - for (Token t : tokens) { - String ss = t.getValue(); - Set l = null; - if (!blocks.containsKey(ss)) { - l = new TreeSet(); - blocks.put(ss, l); - } else { - l = blocks.get(ss); - } - l.add(s); - } - } - - public class BlockEvaluator implements Callable>> { - - int start; - int stop; - double radius; - - List> blocks; - Map> cluster_map; - - public BlockEvaluator(List> blocks, double radius, int start, int stop) { - this.blocks = blocks; - this.start = start; - this.stop = stop; - this.radius = radius; - } - - public Map> call() { - Map> cluster_map = new HashMap>(); - - for (int i = start; i < stop; i++) { - Set set = blocks.get(i); - if (set.size() < 2) continue; - for (String a : set) { - for (String b : set) { - if (a == b) continue; - if (cluster_map.containsKey(a) && cluster_map.get(a).contains(b)) continue; - if (cluster_map.containsKey(b) && cluster_map.get(b).contains(a)) continue; - double d = _distance.d(a,b); - if (d <= radius || radius < 0) { - Set l = null; - if (!cluster_map.containsKey(a)) { - l = new TreeSet(); - l.add(a); - cluster_map.put(a, l); - } else { - l = cluster_map.get(a); - } - l.add(b); - } - } - } - } - - return cluster_map; - } - } - - private static final ExecutorService executor = Executors.newCachedThreadPool(); - - private static final boolean MULTITHREADED = true; - - public List> getClusters(double radius) { - if (MULTITHREADED) { - return getClustersMultiThread(radius); - } else { - return getClustersSingleThread(radius); - } - } - - public List> getClustersMultiThread(double radius) { - - int cores = Runtime.getRuntime().availableProcessors(); - int size = blocks.size(); - int range = size / cores + 1; - - List>> cluster_maps = new ArrayList>>(cores); - - List evaluators = new ArrayList(cores); - for (int i = 0; i < cores; i++) { - int range_start = range * i; - int range_end = range * (i + 1); - if (range_end > size) range_end = size; - evaluators.add(new BlockEvaluator(new ArrayList>(blocks.values()),radius,range_start,range_end)); - } - - try { - List>>> futures = executor.invokeAll(evaluators); - for (Future>> future : futures) { - cluster_maps.add(future.get()); - } - } catch (InterruptedException e1) { - e1.printStackTrace(); - } catch (ExecutionException e) { - e.printStackTrace(); - } - - Set> clusters = new HashSet>(); - - for (Map> cluster_map : cluster_maps) { - for (Entry> e : cluster_map.entrySet()) { - Set v = e.getValue(); - if (v.size() > 1) { - clusters.add(v); - } - } - } - - List> sorted_clusters = new ArrayList>(clusters); - - Collections.sort(sorted_clusters, new SizeComparator()); - - return sorted_clusters; - } - - public List> getClustersSingleThread(double radius) { - - Map> cluster_map = new HashMap>(); - - for (Set set : blocks.values()) { - if (set.size() < 2) continue; - for (String a : set) { - for (String b : set) { - if (a == b) continue; - if (cluster_map.containsKey(a) && cluster_map.get(a).contains(b)) continue; - if (cluster_map.containsKey(b) && cluster_map.get(b).contains(a)) continue; - double d = _distance.d(a,b); - if (d <= radius || radius < 0) { - Set l = null; - if (!cluster_map.containsKey(a)) { - l = new TreeSet(); - l.add(a); - cluster_map.put(a, l); - } else { - l = cluster_map.get(a); - } - l.add(b); - } - } - } - } - - List> clusters = new ArrayList>(); - for (Entry> e : cluster_map.entrySet()) { - Set v = e.getValue(); - if (v.size() > 1) { - clusters.add(v); - } - } - Collections.sort(clusters, new SizeComparator()); - - return clusters; - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/clustering/VPTreeClusterer.java b/src/main/java/edu/mit/simile/vicino/clustering/VPTreeClusterer.java deleted file mode 100644 index a75a2022e..000000000 --- a/src/main/java/edu/mit/simile/vicino/clustering/VPTreeClusterer.java +++ /dev/null @@ -1,63 +0,0 @@ -package edu.mit.simile.vicino.clustering; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import edu.mit.simile.vicino.distances.Distance; -import edu.mit.simile.vicino.vptree.Node; -import edu.mit.simile.vicino.vptree.VPTree; -import edu.mit.simile.vicino.vptree.VPTreeBuilder; -import edu.mit.simile.vicino.vptree.VPTreeSeeker; - -public class VPTreeClusterer extends Clusterer { - - VPTreeBuilder _treeBuilder; - Distance _distance; - - public VPTreeClusterer(Distance d) { - _distance = d; - _treeBuilder = new VPTreeBuilder(d); - } - - public void populate(String s) { - _treeBuilder.populate(s); - } - - public List> getClusters(double radius) { - VPTree tree = _treeBuilder.buildVPTree(); - System.out.println("distances after the tree: " + _distance.getCount()); - Set nodes = _treeBuilder.getNodes(); - - VPTreeSeeker seeker = new VPTreeSeeker(_distance,tree); - Map flags = new HashMap(); - for (Node n : nodes) { - flags.put(n.get(), true); - } - - Map> map = new HashMap>(); - for (Node n : nodes) { - Serializable s = n.get(); - if (flags.get(s)) { - Set results = seeker.range(s, radius); - for (Serializable ss : results) { - flags.put(ss, false); - } - if (results.size() > 1) { - map.put(s, results); - } - } - } - - List> clusters = new ArrayList>(map.values()); - Collections.sort(clusters, new SizeComparator()); - - return clusters; - } - - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java b/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java deleted file mode 100644 index 1c9b8ae2a..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/BZip2Distance.java +++ /dev/null @@ -1,26 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.apache.tools.bzip2.CBZip2OutputStream; - -public class BZip2Distance extends PseudoMetricDistance { - - public double d2(String x, String y) { - String str = x + y; - double result = 0.0f; - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); - CBZip2OutputStream os = new CBZip2OutputStream(baos); - os.write(str.getBytes()); - os.close(); - baos.close(); - result = baos.toByteArray().length; - } catch (IOException e) { - e.printStackTrace(); - } - return result; - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/Distance.java b/src/main/java/edu/mit/simile/vicino/distances/Distance.java deleted file mode 100644 index bdca82842..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/Distance.java +++ /dev/null @@ -1,17 +0,0 @@ -package edu.mit.simile.vicino.distances; - -public abstract class Distance { - - int counter = 0; - - public int getCount() { - return counter; - } - - public void resetCounter() { - counter = 0; - } - - public abstract double d(String x, String y); - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java b/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java deleted file mode 100644 index 263271744..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/GZipDistance.java +++ /dev/null @@ -1,26 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.zip.GZIPOutputStream; - -public class GZipDistance extends PseudoMetricDistance { - - public double d2(String x, String y) { - String str = x + y; - double result = 0.0f; - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); - GZIPOutputStream os = new GZIPOutputStream(baos); - os.write(str.getBytes()); - os.close(); - baos.close(); - result = baos.toByteArray().length; - } catch (IOException e) { - e.printStackTrace(); - } - return result; - - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java deleted file mode 100644 index f8858ee4f..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/JaccardDistance.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import com.wcohen.ss.Jaccard; -import com.wcohen.ss.api.StringDistance; - -public class JaccardDistance extends MetricDistance { - - StringDistance distance; - - public JaccardDistance() { - this.distance = new Jaccard(); - } - - protected double d2(String x, String y) { - return this.distance.score(x, y); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java deleted file mode 100644 index 3d774f4aa..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/JaroDistance.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import com.wcohen.ss.Jaro; -import com.wcohen.ss.api.StringDistance; - -public class JaroDistance extends MetricDistance { - - StringDistance distance; - - public JaroDistance() { - this.distance = new Jaro(); - } - - protected double d2(String x, String y) { - return this.distance.score(x, y); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerDistance.java deleted file mode 100644 index 7ebd8299b..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerDistance.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import com.wcohen.ss.JaroWinkler; -import com.wcohen.ss.api.StringDistance; - -public class JaroWinklerDistance extends MetricDistance { - - StringDistance distance; - - public JaroWinklerDistance() { - this.distance = new JaroWinkler(); - } - - protected double d2(String x, String y) { - return this.distance.score(x, y); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerTFIDFDistance.java b/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerTFIDFDistance.java deleted file mode 100644 index 5eb299f90..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/JaroWinklerTFIDFDistance.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import com.wcohen.ss.JaroWinklerTFIDF; -import com.wcohen.ss.api.StringDistance; - -public class JaroWinklerTFIDFDistance extends MetricDistance { - - StringDistance distance; - - public JaroWinklerTFIDFDistance() { - this.distance = new JaroWinklerTFIDF(); - } - - protected double d2(String x, String y) { - return this.distance.score(x, y); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/LevenshteinDistance.java b/src/main/java/edu/mit/simile/vicino/distances/LevenshteinDistance.java deleted file mode 100644 index 252345806..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/LevenshteinDistance.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import com.wcohen.ss.Levenstein; -import com.wcohen.ss.api.StringDistance; - -public class LevenshteinDistance extends MetricDistance { - - StringDistance distance; - - public LevenshteinDistance() { - this.distance = new Levenstein(); - } - - public double d2(String x, String y) { - return Math.abs(this.distance.score(x, y)); - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java deleted file mode 100644 index cfc3c79eb..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/MetricDistance.java +++ /dev/null @@ -1,24 +0,0 @@ -package edu.mit.simile.vicino.distances; - - -public abstract class MetricDistance extends Distance { - - /* - * public float d(String x,String y) { - * float dxy = d2(x,y); - * float dx = d2(x,""); - * float dy = d2(y,""); - * float result = dxy / (dx + dy); - * return result; - * } - */ - - public double d(String x, String y) { - double result = d2(x, y); - counter += 1; - return result; - } - - abstract double d2(String x, String y); - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java deleted file mode 100644 index 727348f8e..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/PPMDistance.java +++ /dev/null @@ -1,27 +0,0 @@ -package edu.mit.simile.vicino.distances; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import com.colloquial.arithcode.ArithCodeOutputStream; -import com.colloquial.arithcode.PPMModel; - -public class PPMDistance extends PseudoMetricDistance { - - public double d2(String x, String y) { - String str = x + y; - double result = 0.0f; - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length()); - ArithCodeOutputStream os = new ArithCodeOutputStream(baos,new PPMModel(8)); - os.write(str.getBytes()); - os.close(); - baos.close(); - result = baos.toByteArray().length; - } catch (IOException e) { - e.printStackTrace(); - } - return result; - } - -} diff --git a/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java b/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java deleted file mode 100644 index c1c9e4d62..000000000 --- a/src/main/java/edu/mit/simile/vicino/distances/PseudoMetricDistance.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.mit.simile.vicino.distances; - - -public abstract class PseudoMetricDistance extends Distance { - - public double d(String x, String y) { - double cxx = d2(x, x); - double cyy = d2(y, y); - double cxy = d2(x, y); - double cyx = d2(y, x); - counter += 4; - return 10.0d * ((cxy + cyx) / (cxx + cyy) - 1.0d); - } - - protected abstract double d2(String x, String y); -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/Node.java b/src/main/java/edu/mit/simile/vicino/vptree/Node.java deleted file mode 100755 index 2eccb8eb9..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/Node.java +++ /dev/null @@ -1,58 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -import java.io.Serializable; - -/** - * This class represent a couple (Object, distance) value of that Object from - * the Vp in each step of the algorithm. - * - * @author Paolo Ciccarese - */ -public class Node implements Serializable { - - private static final long serialVersionUID = -2077473220894258550L; - - private final Serializable obj; - private double distance; - - public Node(Serializable obj, int i) { - this.obj = obj; - this.distance = i; - } - - public Node(Serializable obj) { - this.obj = obj; - } - - public Serializable get() { - return this.obj; - } - - public void setDistance(double distance) { - this.distance = distance; - } - - public double getDistance() { - return distance; - } - - public String toString() { - return obj.toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o instanceof Node) { - return ((Node) o).get().equals(this.obj); - } - return false; - } - - @Override - public int hashCode() { - return this.obj.hashCode(); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java b/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java deleted file mode 100755 index 140891cf4..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java +++ /dev/null @@ -1,94 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -public class NodeSorter { - - /** - * Sorts and array of objects. - */ - public void sort(Node nodes[]) { - NodeSorter.sort(nodes, 0, nodes.length - 1); - } - - /** - * Sort array of Objects using the QuickSort algorithm. - * - * @param s - * An Object[]. - * @param lo - * The current lower bound. - * @param hi - * The current upper bound. - */ - public static void sort(Node nodes[], int lo, int hi) { - if (lo >= hi) { - return; - } - - /* - * Use median-of-three(lo, mid, hi) to pick a partition. Also swap them - * into relative order while we are at it. - */ - int mid = (lo + hi) / 2; - - if (nodes[lo].getDistance() > nodes[mid].getDistance()) { - // Swap. - Node tmp = nodes[lo]; - nodes[lo] = nodes[mid]; - nodes[mid] = tmp; - } - - if (nodes[mid].getDistance() > nodes[hi].getDistance()) { - // Swap . - Node tmp = nodes[mid]; - nodes[mid] = nodes[hi]; - nodes[hi] = tmp; - - if (nodes[lo].getDistance() > nodes[mid].getDistance()) { - // Swap. - Node tmp2 = nodes[lo]; - nodes[lo] = nodes[mid]; - nodes[mid] = tmp2; - } - } - - // Start one past lo since already handled lo. - - int left = lo + 1; - - // Similarly, end one before hi since already handled hi. - - int right = hi - 1; - - // If there are three or fewer elements, we are done. - - if (left >= right) { - return; - } - - Node partition = nodes[mid]; - - while (true) { - while (nodes[right].getDistance() > partition.getDistance()) { - --right; - } - - while (left < right && nodes[left].getDistance() <= partition.getDistance()) { - ++left; - } - - if (left < right) { - // Swap. - Node tmp = nodes[left]; - nodes[left] = nodes[right]; - nodes[right] = tmp; - - --right; - } else { - break; - } - } - - sort(nodes, lo, left); - sort(nodes, left + 1, hi); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/TNode.java b/src/main/java/edu/mit/simile/vicino/vptree/TNode.java deleted file mode 100755 index e683e1601..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/TNode.java +++ /dev/null @@ -1,56 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -import java.io.Serializable; - -/** - * @author Paolo Ciccarese - */ -public class TNode implements Serializable { - - private static final long serialVersionUID = -217604190976851241L; - - private final Serializable obj; - private double median; - private TNode left; - private TNode right; - - /** - * The Object will be fixed during the instantiation of the node, while the - * children will be defined in another iteration of the algorithm, - */ - public TNode(Serializable obj) { - this.obj = obj; - } - - public Serializable get() { - return this.obj; - } - - public void setMedian(double median) { - this.median = median; - } - - public double getMedian() { - return median; - } - - public void setLeft(TNode leftNode) { - this.left = leftNode; - } - - public TNode getLeft() { - return left; - } - - public void setRight(TNode rightNode) { - this.right = rightNode; - } - - public TNode getRight() { - return right; - } - - public String toString() { - return this.obj.toString(); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java deleted file mode 100755 index 4ce8cb997..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/VPTree.java +++ /dev/null @@ -1,33 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -import java.io.Serializable; - -/** - * The VPTree class. - * - * @author Paolo Ciccarese - */ -public class VPTree implements Serializable { - - private static final long serialVersionUID = 1291056732155841123L; - - private TNode root; - - /** - * Sets the root of the VPTree. - * - * @param root The VPTree root. - */ - public void setRoot(TNode root) { - this.root = root; - } - - /** - * Get the root of the VPTree. - * - * @return The VPTree root. - */ - public TNode getRoot() { - return root; - } -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java deleted file mode 100755 index 644c3983f..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java +++ /dev/null @@ -1,192 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -import java.io.Serializable; -import java.util.Collection; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; - -import edu.mit.simile.vicino.distances.Distance; - -/** - * @author Paolo Ciccarese - * @author Stefano Mazzocchi - */ -public class VPTreeBuilder { - - private static final boolean DEBUG = false; - private static final boolean OPTIMIZED = false; - private static final int sample_size = 10; - - private Random generator = new Random(System.currentTimeMillis()); - - private final Distance distance; - - private Set nodes = new HashSet(); - - /** - * Defines a VPTree Builder for a specific distance. - * - * @param distance - * The class implementing the distance. - */ - public VPTreeBuilder(Distance distance) { - this.distance = distance; - } - - public Set getNodes() { - return this.nodes; - } - - public void populate(Serializable s) { - nodes.add(new Node(s)); - } - - public VPTree buildVPTree() { - if (DEBUG) { - for (Node n : this.nodes) { - System.out.println(n.get().toString()); - } - System.out.println(); - } - Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]); - VPTree tree = new VPTree(); - if (nodes_array.length > 0) { - tree.setRoot(makeNode(nodes_array, 0, nodes_array.length - 1)); - } - return tree; - } - - public VPTree buildVPTree(Collection values) { - reset(); - for (Serializable s : values) { - populate(s); - } - return buildVPTree(); - } - - public void reset() { - this.nodes.clear(); - } - - private TNode makeNode(Node nodes[], int begin, int end) { - - int delta = end - begin; - - if (DEBUG) System.out.println("\ndelta: " + delta); - - if (delta == 0) { - TNode vpNode = new TNode(nodes[begin].get()); - vpNode.setMedian(0); - return vpNode; - } else if (delta < 0) { - return null; - } - - Node randomNode = getVantagePoint(nodes, begin, end); - TNode vpNode = new TNode(randomNode.get()); - - if (DEBUG) System.out.println("\nvp-node: " + vpNode.get().toString()); - - calculateDistances(vpNode, nodes, begin, end); - orderDistances(nodes, begin, end); - fixVantagPoint(randomNode, nodes, begin, end); - - if (DEBUG) { - for (int i = begin; i <= end; i++) { - System.out.println(" +-- " + nodes[i].getDistance() + " --> " + nodes[i].get()); - } - } - - float median = (float) median(nodes, begin, end); - vpNode.setMedian(median); - - int i = 0; - for (i = begin + 1; i < end; i++) { - if (nodes[i].getDistance() >= median) { - vpNode.setLeft(makeNode(nodes, begin + 1, i - 1)); - break; - } - } - vpNode.setRight(makeNode(nodes, i, end)); - - return vpNode; - } - - private Node getVantagePoint(Node nodes[], int begin, int end) { - if (OPTIMIZED) { - Node buffer[] = new Node[sample_size]; - for (int i = 0; i < sample_size; i++) { - buffer[i] = getRandomNode(nodes,begin,end); - } - - double bestSpread = 0; - Node bestNode = buffer[0]; - for (int i = 0; i < sample_size; i++) { - calculateDistances(new TNode(buffer[i]), buffer, 0, buffer.length - 1); - orderDistances(nodes, begin, end); - double median = (double) median(nodes, begin, end); - double spread = deviation(buffer, median); - System.out.println(" " + spread); - if (spread > bestSpread) { - bestSpread = spread; - bestNode = buffer[i]; - } - } - - System.out.println("best: " + bestSpread); - return bestNode; - } else { - return getRandomNode(nodes,begin,end); - } - } - - private Node getRandomNode(Node nodes[], int begin, int end) { - return nodes[begin + generator.nextInt(end - begin)]; - } - - private double deviation(Node buffer[], double median) { - double sum = 0; - for (int i = 0; i < buffer.length; i++) { - sum += Math.pow(buffer[i].getDistance() - median, 2); - } - return sum / buffer.length; - } - - public double median(Node nodes[], int begin, int end) { - int delta = end - begin; - int middle = delta / 2; - - if (delta % 2 == 0) { - return nodes[begin + middle].getDistance(); - } else { - return (nodes[begin + middle].getDistance() + nodes[begin + middle + 1].getDistance()) / 2.0d; - } - } - - private void calculateDistances(TNode pivot, Node nodes[], int begin, int end) { - Serializable x = pivot.get(); - for (int i = begin; i <= end; i++) { - Serializable y = nodes[i].get(); - double d = (x == y || x.equals(y)) ? 0.0d : distance.d(x.toString(), y.toString()); - nodes[i].setDistance(d); - } - } - - private void fixVantagPoint(Node pivot, Node nodes[], int begin, int end) { - for (int i = begin; i < end; i++) { - if (nodes[i] == pivot) { - if (i > begin) { - Node tmp = nodes[begin]; - nodes[begin] = pivot; - nodes[i] = tmp; - break; - } - } - } - } - - private void orderDistances(Node nodes[], int begin, int end) { - NodeSorter.sort(nodes, begin, end); - } -} diff --git a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java b/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java deleted file mode 100755 index 236815f60..000000000 --- a/src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java +++ /dev/null @@ -1,59 +0,0 @@ -package edu.mit.simile.vicino.vptree; - -import java.io.Serializable; -import java.util.HashSet; -import java.util.Set; - -import edu.mit.simile.vicino.distances.Distance; - -/** - * @author Paolo Ciccarese - */ -public class VPTreeSeeker { - - private static final boolean DEBUG = false; - - VPTree tree; - Distance distance; - - public VPTreeSeeker(Distance distance, VPTree tree) { - this.distance = distance; - this.tree = tree; - } - - public Set range(Serializable query, double range) { - if (DEBUG) System.out.println("--------------- " + query + " " + range); - return rangeTraversal(query, range, tree.getRoot(), new HashSet()); - } - - private Set rangeTraversal(Serializable query, double range, TNode tNode, Set results) { - - if (DEBUG) System.out.println("> " + tNode); - - if (tNode != null) { - double distance = this.distance.d(query.toString(), tNode.get().toString()); - - if (distance <= range) { - if (DEBUG) System.out.println("*** add ***"); - results.add(tNode.get()); - } - - if ((distance + range) < tNode.getMedian()) { - if (DEBUG) System.out.println("left: " + distance + " + " + range + " < " + tNode.getMedian()); - rangeTraversal(query, range, tNode.getLeft(), results); - } else if ((distance - range) > tNode.getMedian()) { - if (DEBUG) System.out.println("right: " + distance + " + " + range + " > " + tNode.getMedian()); - rangeTraversal(query, range, tNode.getRight(), results); - } else { - if (DEBUG) System.out.println("left & right: " + distance + " + " + range + " = " + tNode.getMedian()); - rangeTraversal(query, range, tNode.getLeft(), results); - rangeTraversal(query, range, tNode.getRight(), results); - } - } - - if (DEBUG) System.out.println("< " + tNode); - - return results; - } - -} diff --git a/src/main/java/org/apache/tools/bzip2/BZip2Constants.java b/src/main/java/org/apache/tools/bzip2/BZip2Constants.java deleted file mode 100644 index 4f832d67d..000000000 --- a/src/main/java/org/apache/tools/bzip2/BZip2Constants.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, if - * any, must include the following acknowlegement: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowlegement may appear in the software itself, - * if and wherever such third-party acknowlegements normally appear. - * - * 4. The names "Ant" and "Apache Software - * Foundation" must not be used to endorse or promote products derived - * from this software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/* - * This package is based on the work done by Keiron Liddle, Aftex Software - * to whom the Ant project is very grateful for his - * great code. - */ - -package org.apache.tools.bzip2; - -/** - * Base class for both the compress and decompress classes. - * Holds common arrays, and static data. - * - * @author Keiron Liddle - */ -public interface BZip2Constants { - - int baseBlockSize = 100000; - int MAX_ALPHA_SIZE = 258; - int MAX_CODE_LEN = 23; - int RUNA = 0; - int RUNB = 1; - int N_GROUPS = 6; - int G_SIZE = 50; - int N_ITERS = 4; - int MAX_SELECTORS = (2 + (900000 / G_SIZE)); - int NUM_OVERSHOOT_BYTES = 20; - - int[] rNums = { - 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, - 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, - 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, - 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, - 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, - 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, - 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, - 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, - 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, - 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, - 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, - 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, - 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, - 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, - 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, - 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, - 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, - 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, - 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, - 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, - 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, - 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, - 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, - 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, - 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, - 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, - 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, - 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, - 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, - 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, - 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, - 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, - 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, - 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, - 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, - 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, - 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, - 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, - 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, - 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, - 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, - 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, - 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, - 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, - 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, - 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, - 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, - 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, - 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, - 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, - 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, - 936, 638 - }; -} diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java deleted file mode 100644 index da4c82017..000000000 --- a/src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java +++ /dev/null @@ -1,865 +0,0 @@ -/* - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001-2003 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, if - * any, must include the following acknowlegement: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowlegement may appear in the software itself, - * if and wherever such third-party acknowlegements normally appear. - * - * 4. The names "Ant" and "Apache Software - * Foundation" must not be used to endorse or promote products derived - * from this software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/* - * This package is based on the work done by Keiron Liddle, Aftex Software - * to whom the Ant project is very grateful for his - * great code. - */ -package org.apache.tools.bzip2; - -import java.io.IOException; -import java.io.InputStream; - -/** - * An input stream that decompresses from the BZip2 format (without the file - * header chars) to be read as any other stream. - * - * @author Keiron Liddle - */ -public class CBZip2InputStream extends InputStream implements BZip2Constants { - private static void cadvise() { - System.out.println("CRC Error"); - //throw new CCoruptionError(); - } - - private static void compressedStreamEOF() { - cadvise(); - } - - private void makeMaps() { - int i; - nInUse = 0; - for (i = 0; i < 256; i++) { - if (inUse[i]) { - seqToUnseq[nInUse] = (char) i; - unseqToSeq[i] = (char) nInUse; - nInUse++; - } - } - } - - /* - index of the last char in the block, so - the block size == last + 1. - */ - private int last; - - /* - index in zptr[] of original string after sorting. - */ - private int origPtr; - - /* - always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ - private int blockSize100k; - - private boolean blockRandomised; - - private int bsBuff; - private int bsLive; - private CRC mCrc = new CRC(); - - private boolean[] inUse = new boolean[256]; - private int nInUse; - - private char[] seqToUnseq = new char[256]; - private char[] unseqToSeq = new char[256]; - - private char[] selector = new char[MAX_SELECTORS]; - private char[] selectorMtf = new char[MAX_SELECTORS]; - - private int[] tt; - private char[] ll8; - - /* - freq table collected to save a pass over the data - during decompression. - */ - private int[] unzftab = new int[256]; - - private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; - private int[] minLens = new int[N_GROUPS]; - - private InputStream bsStream; - - private boolean streamEnd = false; - - private int currentChar = -1; - - private static final int START_BLOCK_STATE = 1; - private static final int RAND_PART_A_STATE = 2; - private static final int RAND_PART_B_STATE = 3; - private static final int RAND_PART_C_STATE = 4; - private static final int NO_RAND_PART_A_STATE = 5; - private static final int NO_RAND_PART_B_STATE = 6; - private static final int NO_RAND_PART_C_STATE = 7; - - private int currentState = START_BLOCK_STATE; - - private int storedBlockCRC, storedCombinedCRC; - private int computedBlockCRC, computedCombinedCRC; - - int i2, count, chPrev, ch2; - int i, tPos; - int rNToGo = 0; - int rTPos = 0; - int j2; - char z; - - public CBZip2InputStream(InputStream zStream) { - ll8 = null; - tt = null; - bsSetStream(zStream); - initialize(); - initBlock(); - setupBlock(); - } - - public int read() { - if (streamEnd) { - return -1; - } else { - int retChar = currentChar; - switch(currentState) { - case START_BLOCK_STATE: - break; - case RAND_PART_A_STATE: - break; - case RAND_PART_B_STATE: - setupRandPartB(); - break; - case RAND_PART_C_STATE: - setupRandPartC(); - break; - case NO_RAND_PART_A_STATE: - break; - case NO_RAND_PART_B_STATE: - setupNoRandPartB(); - break; - case NO_RAND_PART_C_STATE: - setupNoRandPartC(); - break; - default: - break; - } - return retChar; - } - } - - private void initialize() { - char magic3, magic4; - magic3 = bsGetUChar(); - magic4 = bsGetUChar(); - if (magic3 != 'h' || magic4 < '1' || magic4 > '9') { - bsFinishedWithStream(); - streamEnd = true; - return; - } - - setDecompressStructureSizes(magic4 - '0'); - computedCombinedCRC = 0; - } - - private void initBlock() { - char magic1, magic2, magic3, magic4; - char magic5, magic6; - magic1 = bsGetUChar(); - magic2 = bsGetUChar(); - magic3 = bsGetUChar(); - magic4 = bsGetUChar(); - magic5 = bsGetUChar(); - magic6 = bsGetUChar(); - if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 - && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) { - complete(); - return; - } - - if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 - || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) { - badBlockHeader(); - streamEnd = true; - return; - } - - storedBlockCRC = bsGetInt32(); - - if (bsR(1) == 1) { - blockRandomised = true; - } else { - blockRandomised = false; - } - - // currBlockNo++; - getAndMoveToFrontDecode(); - - mCrc.initialiseCRC(); - currentState = START_BLOCK_STATE; - } - - private void endBlock() { - computedBlockCRC = mCrc.getFinalCRC(); - /* A bad CRC is considered a fatal error. */ - if (storedBlockCRC != computedBlockCRC) { - crcError(); - } - - computedCombinedCRC = (computedCombinedCRC << 1) - | (computedCombinedCRC >>> 31); - computedCombinedCRC ^= computedBlockCRC; - } - - private void complete() { - storedCombinedCRC = bsGetInt32(); - if (storedCombinedCRC != computedCombinedCRC) { - crcError(); - } - - bsFinishedWithStream(); - streamEnd = true; - } - - private static void blockOverrun() { - cadvise(); - } - - private static void badBlockHeader() { - cadvise(); - } - - private static void crcError() { - cadvise(); - } - - private void bsFinishedWithStream() { - try { - if (this.bsStream != null) { - if (this.bsStream != System.in) { - this.bsStream.close(); - this.bsStream = null; - } - } - } catch (IOException ioe) { - //ignore - } - } - - private void bsSetStream(InputStream f) { - bsStream = f; - bsLive = 0; - bsBuff = 0; - } - - private int bsR(int n) { - int v; - while (bsLive < n) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - - v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); - bsLive -= n; - return v; - } - - private char bsGetUChar() { - return (char) bsR(8); - } - - private int bsGetint() { - int u = 0; - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - u = (u << 8) | bsR(8); - return u; - } - - private int bsGetIntVS(int numBits) { - return (int) bsR(numBits); - } - - private int bsGetInt32() { - return (int) bsGetint(); - } - - private void hbCreateDecodeTables(int[] limit, int[] base, - int[] perm, char[] length, - int minLen, int maxLen, int alphaSize) { - int pp, i, j, vec; - - pp = 0; - for (i = minLen; i <= maxLen; i++) { - for (j = 0; j < alphaSize; j++) { - if (length[j] == i) { - perm[pp] = j; - pp++; - } - } - } - - for (i = 0; i < MAX_CODE_LEN; i++) { - base[i] = 0; - } - for (i = 0; i < alphaSize; i++) { - base[length[i] + 1]++; - } - - for (i = 1; i < MAX_CODE_LEN; i++) { - base[i] += base[i - 1]; - } - - for (i = 0; i < MAX_CODE_LEN; i++) { - limit[i] = 0; - } - vec = 0; - - for (i = minLen; i <= maxLen; i++) { - vec += (base[i + 1] - base[i]); - limit[i] = vec - 1; - vec <<= 1; - } - for (i = minLen + 1; i <= maxLen; i++) { - base[i] = ((limit[i - 1] + 1) << 1) - base[i]; - } - } - - private void recvDecodingTables() { - char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; - int i, j, t, nGroups, nSelectors, alphaSize; - int minLen, maxLen; - boolean[] inUse16 = new boolean[16]; - - /* Receive the mapping table */ - for (i = 0; i < 16; i++) { - if (bsR(1) == 1) { - inUse16[i] = true; - } else { - inUse16[i] = false; - } - } - - for (i = 0; i < 256; i++) { - inUse[i] = false; - } - - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - for (j = 0; j < 16; j++) { - if (bsR(1) == 1) { - inUse[i * 16 + j] = true; - } - } - } - } - - makeMaps(); - alphaSize = nInUse + 2; - - /* Now the selectors */ - nGroups = bsR(3); - nSelectors = bsR(15); - for (i = 0; i < nSelectors; i++) { - j = 0; - while (bsR(1) == 1) { - j++; - } - selectorMtf[i] = (char) j; - } - - /* Undo the MTF values for the selectors. */ - { - char[] pos = new char[N_GROUPS]; - char tmp, v; - for (v = 0; v < nGroups; v++) { - pos[v] = v; - } - - for (i = 0; i < nSelectors; i++) { - v = selectorMtf[i]; - tmp = pos[v]; - while (v > 0) { - pos[v] = pos[v - 1]; - v--; - } - pos[0] = tmp; - selector[i] = tmp; - } - } - - /* Now the coding tables */ - for (t = 0; t < nGroups; t++) { - int curr = bsR(5); - for (i = 0; i < alphaSize; i++) { - while (bsR(1) == 1) { - if (bsR(1) == 0) { - curr++; - } else { - curr--; - } - } - len[t][i] = (char) curr; - } - } - - /* Create the Huffman decoding tables */ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) { - maxLen = len[t][i]; - } - if (len[t][i] < minLen) { - minLen = len[t][i]; - } - } - hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, - maxLen, alphaSize); - minLens[t] = minLen; - } - } - - private void getAndMoveToFrontDecode() { - char[] yy = new char[256]; - int i, j, nextSym, limitLast; - int EOB, groupNo, groupPos; - - limitLast = baseBlockSize * blockSize100k; - origPtr = bsGetIntVS(24); - - recvDecodingTables(); - EOB = nInUse + 1; - groupNo = -1; - groupPos = 0; - - /* - Setting up the unzftab entries here is not strictly - necessary, but it does save having to do it later - in a separate pass, and so saves a block's worth of - cache misses. - */ - for (i = 0; i <= 255; i++) { - unzftab[i] = 0; - } - - for (i = 0; i <= 255; i++) { - yy[i] = (char) i; - } - - last = -1; - - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; - } - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - } - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - base[zt][zn]]; - } - - while (true) { - - if (nextSym == EOB) { - break; - } - - if (nextSym == RUNA || nextSym == RUNB) { - char ch; - int s = -1; - int N = 1; - do { - if (nextSym == RUNA) { - s = s + (0 + 1) * N; - } else if (nextSym == RUNB) { - s = s + (1 + 1) * N; - } - N = N * 2; - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; - } - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - if (thech == -1) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - } - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - base[zt][zn]]; - } - } while (nextSym == RUNA || nextSym == RUNB); - - s++; - ch = seqToUnseq[yy[0]]; - unzftab[ch] += s; - - while (s > 0) { - last++; - ll8[last] = ch; - s--; - } - - if (last >= limitLast) { - blockOverrun(); - } - continue; - } else { - char tmp; - last++; - if (last >= limitLast) { - blockOverrun(); - } - - tmp = yy[nextSym - 1]; - unzftab[seqToUnseq[tmp]]++; - ll8[last] = seqToUnseq[tmp]; - - /* - This loop is hammered during decompression, - hence the unrolling. - - for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; - */ - - j = nextSym - 1; - for (; j > 3; j -= 4) { - yy[j] = yy[j - 1]; - yy[j - 1] = yy[j - 2]; - yy[j - 2] = yy[j - 3]; - yy[j - 3] = yy[j - 4]; - } - for (; j > 0; j--) { - yy[j] = yy[j - 1]; - } - - yy[0] = tmp; - { - int zt, zn, zvec, zj; - if (groupPos == 0) { - groupNo++; - groupPos = G_SIZE; - } - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = bsR(zn); - while (zvec > limit[zt][zn]) { - zn++; - { - { - while (bsLive < 1) { - int zzi; - char thech = 0; - try { - thech = (char) bsStream.read(); - } catch (IOException e) { - compressedStreamEOF(); - } - zzi = thech; - bsBuff = (bsBuff << 8) | (zzi & 0xff); - bsLive += 8; - } - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - } - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - base[zt][zn]]; - } - continue; - } - } - } - - private void setupBlock() { - int[] cftab = new int[257]; - char ch; - - cftab[0] = 0; - for (i = 1; i <= 256; i++) { - cftab[i] = unzftab[i - 1]; - } - for (i = 1; i <= 256; i++) { - cftab[i] += cftab[i - 1]; - } - - for (i = 0; i <= last; i++) { - ch = (char) ll8[i]; - tt[cftab[ch]] = i; - cftab[ch]++; - } - cftab = null; - - tPos = tt[origPtr]; - - count = 0; - i2 = 0; - ch2 = 256; /* not a char and not EOF */ - - if (blockRandomised) { - rNToGo = 0; - rTPos = 0; - setupRandPartA(); - } else { - setupNoRandPartA(); - } - } - - private void setupRandPartA() { - if (i2 <= last) { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) { - rNToGo = rNums[rTPos]; - rTPos++; - if (rTPos == 512) { - rTPos = 0; - } - } - rNToGo--; - ch2 ^= (int) ((rNToGo == 1) ? 1 : 0); - i2++; - - currentChar = ch2; - currentState = RAND_PART_B_STATE; - mCrc.updateCRC(ch2); - } else { - endBlock(); - initBlock(); - setupBlock(); - } - } - - private void setupNoRandPartA() { - if (i2 <= last) { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - i2++; - - currentChar = ch2; - currentState = NO_RAND_PART_B_STATE; - mCrc.updateCRC(ch2); - } else { - endBlock(); - initBlock(); - setupBlock(); - } - } - - private void setupRandPartB() { - if (ch2 != chPrev) { - currentState = RAND_PART_A_STATE; - count = 1; - setupRandPartA(); - } else { - count++; - if (count >= 4) { - z = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) { - rNToGo = rNums[rTPos]; - rTPos++; - if (rTPos == 512) { - rTPos = 0; - } - } - rNToGo--; - z ^= ((rNToGo == 1) ? 1 : 0); - j2 = 0; - currentState = RAND_PART_C_STATE; - setupRandPartC(); - } else { - currentState = RAND_PART_A_STATE; - setupRandPartA(); - } - } - } - - private void setupRandPartC() { - if (j2 < (int) z) { - currentChar = ch2; - mCrc.updateCRC(ch2); - j2++; - } else { - currentState = RAND_PART_A_STATE; - i2++; - count = 0; - setupRandPartA(); - } - } - - private void setupNoRandPartB() { - if (ch2 != chPrev) { - currentState = NO_RAND_PART_A_STATE; - count = 1; - setupNoRandPartA(); - } else { - count++; - if (count >= 4) { - z = ll8[tPos]; - tPos = tt[tPos]; - currentState = NO_RAND_PART_C_STATE; - j2 = 0; - setupNoRandPartC(); - } else { - currentState = NO_RAND_PART_A_STATE; - setupNoRandPartA(); - } - } - } - - private void setupNoRandPartC() { - if (j2 < (int) z) { - currentChar = ch2; - mCrc.updateCRC(ch2); - j2++; - } else { - currentState = NO_RAND_PART_A_STATE; - i2++; - count = 0; - setupNoRandPartA(); - } - } - - private void setDecompressStructureSizes(int newSize100k) { - if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k - && blockSize100k <= 9)) { - // throw new IOException("Invalid block size"); - } - - blockSize100k = newSize100k; - - if (newSize100k == 0) { - return; - } - - int n = baseBlockSize * newSize100k; - ll8 = new char[n]; - tt = new int[n]; - } -} - diff --git a/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java b/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java deleted file mode 100644 index 521cfe70d..000000000 --- a/src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java +++ /dev/null @@ -1,1665 +0,0 @@ -/* - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001-2003 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, if - * any, must include the following acknowlegement: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowlegement may appear in the software itself, - * if and wherever such third-party acknowlegements normally appear. - * - * 4. The names "Ant" and "Apache Software - * Foundation" must not be used to endorse or promote products derived - * from this software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/* - * This package is based on the work done by Keiron Liddle, Aftex Software - * to whom the Ant project is very grateful for his - * great code. - */ - -package org.apache.tools.bzip2; - -import java.io.IOException; -import java.io.OutputStream; - -/** - * An output stream that compresses into the BZip2 format (without the file - * header chars) into another stream. - * - * @author Keiron Liddle - * - * TODO: Update to BZip2 1.0.1 - */ -public class CBZip2OutputStream extends OutputStream implements BZip2Constants { - protected static final int SETMASK = (1 << 21); - protected static final int CLEARMASK = (~SETMASK); - protected static final int GREATER_ICOST = 15; - protected static final int LESSER_ICOST = 0; - protected static final int SMALL_THRESH = 20; - protected static final int DEPTH_THRESH = 10; - - /* - If you are ever unlucky/improbable enough - to get a stack overflow whilst sorting, - increase the following constant and try - again. In practice I have never seen the - stack go above 27 elems, so the following - limit seems very generous. - */ - protected static final int QSORT_STACK_SIZE = 1000; - - private static void panic() { - System.out.println("panic"); - //throw new CError(); - } - - private void makeMaps() { - int i; - nInUse = 0; - for (i = 0; i < 256; i++) { - if (inUse[i]) { - seqToUnseq[nInUse] = (char) i; - unseqToSeq[i] = (char) nInUse; - nInUse++; - } - } - } - - protected static void hbMakeCodeLengths(char[] len, int[] freq, - int alphaSize, int maxLen) { - /* - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - */ - int nNodes, nHeap, n1, n2, i, j, k; - boolean tooLong; - - int[] heap = new int[MAX_ALPHA_SIZE + 2]; - int[] weight = new int[MAX_ALPHA_SIZE * 2]; - int[] parent = new int[MAX_ALPHA_SIZE * 2]; - - for (i = 0; i < alphaSize; i++) { - weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - } - - while (true) { - nNodes = alphaSize; - nHeap = 0; - - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (i = 1; i <= alphaSize; i++) { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - { - int zz, tmp; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - } - } - if (!(nHeap < (MAX_ALPHA_SIZE + 2))) { - panic(); - } - - while (nHeap > 1) { - n1 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; - } - heap[zz] = tmp; - } - n2 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - { - int zz = 0, yy = 0, tmp = 0; - zz = 1; - tmp = heap[zz]; - while (true) { - yy = zz << 1; - if (yy > nHeap) { - break; - } - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) { - break; - } - heap[zz] = heap[yy]; - zz = yy; - } - heap[zz] = tmp; - } - nNodes++; - parent[n1] = parent[n2] = nNodes; - - weight[nNodes] = ((weight[n1] & 0xffffff00) - + (weight[n2] & 0xffffff00)) - | (1 + (((weight[n1] & 0x000000ff) > - (weight[n2] & 0x000000ff)) ? - (weight[n1] & 0x000000ff) : - (weight[n2] & 0x000000ff))); - - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - { - int zz = 0, tmp = 0; - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - } - } - if (!(nNodes < (MAX_ALPHA_SIZE * 2))) { - panic(); - } - - tooLong = false; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { - k = parent[k]; - j++; - } - len[i - 1] = (char) j; - if (j > maxLen) { - tooLong = true; - } - } - - if (!tooLong) { - break; - } - - for (i = 1; i < alphaSize; i++) { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; - } - } - } - - /* - index of the last char in the block, so - the block size == last + 1. - */ - int last; - - /* - index in zptr[] of original string after sorting. - */ - int origPtr; - - /* - always: in the range 0 .. 9. - The current block size is 100000 * this number. - */ - int blockSize100k; - - boolean blockRandomised; - - int bytesOut; - int bsBuff; - int bsLive; - CRC mCrc = new CRC(); - - private boolean[] inUse = new boolean[256]; - private int nInUse; - - private char[] seqToUnseq = new char[256]; - private char[] unseqToSeq = new char[256]; - - private char[] selector = new char[MAX_SELECTORS]; - private char[] selectorMtf = new char[MAX_SELECTORS]; - - private char[] block; - private int[] quadrant; - private int[] zptr; - private short[] szptr; - private int[] ftab; - - private int nMTF; - - private int[] mtfFreq = new int[MAX_ALPHA_SIZE]; - - /* - * Used when sorting. If too many long comparisons - * happen, we stop sorting, randomise the block - * slightly, and try again. - */ - private int workFactor; - private int workDone; - private int workLimit; - private boolean firstAttempt; - private int nBlocksRandomised; - - private int currentChar = -1; - private int runLength = 0; - - public CBZip2OutputStream(OutputStream inStream) throws IOException { - this(inStream, 9); - } - - public CBZip2OutputStream(OutputStream inStream, int inBlockSize) - throws IOException { - block = null; - quadrant = null; - zptr = null; - ftab = null; - - bsSetStream(inStream); - - workFactor = 50; - if (inBlockSize > 9) { - inBlockSize = 9; - } - if (inBlockSize < 1) { - inBlockSize = 1; - } - blockSize100k = inBlockSize; - allocateCompressStructures(); - initialize(); - initBlock(); - } - - /** - * - * modified by Oliver Merkel, 010128 - * - */ - public void write(int bv) throws IOException { - int b = (256 + bv) % 256; - if (currentChar != -1) { - if (currentChar == b) { - runLength++; - if (runLength > 254) { - writeRun(); - currentChar = -1; - runLength = 0; - } - } else { - writeRun(); - runLength = 1; - currentChar = b; - } - } else { - currentChar = b; - runLength++; - } - } - - private void writeRun() throws IOException { - if (last < allowableBlockSize) { - inUse[currentChar] = true; - for (int i = 0; i < runLength; i++) { - mCrc.updateCRC((char) currentChar); - } - switch (runLength) { - case 1: - last++; - block[last + 1] = (char) currentChar; - break; - case 2: - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - break; - case 3: - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - break; - default: - inUse[runLength - 4] = true; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) currentChar; - last++; - block[last + 1] = (char) (runLength - 4); - break; - } - } else { - endBlock(); - initBlock(); - writeRun(); - } - } - - boolean closed = false; - - protected void finalize() throws Throwable { - close(); - super.finalize(); - } - - public void close() throws IOException { - if (closed) { - return; - } - - if (runLength > 0) { - writeRun(); - } - currentChar = -1; - endBlock(); - endCompression(); - closed = true; - super.close(); - bsStream.close(); - } - - public void flush() throws IOException { - super.flush(); - bsStream.flush(); - } - - private int blockCRC, combinedCRC; - - private void initialize() throws IOException { - bytesOut = 0; - nBlocksRandomised = 0; - - /* Write `magic' bytes h indicating file-format == huffmanised, - followed by a digit indicating blockSize100k. - */ - bsPutUChar('h'); - bsPutUChar('0' + blockSize100k); - - combinedCRC = 0; - } - - private int allowableBlockSize; - - private void initBlock() { - // blockNo++; - mCrc.initialiseCRC(); - last = -1; - // ch = 0; - - for (int i = 0; i < 256; i++) { - inUse[i] = false; - } - - /* 20 is just a paranoia constant */ - allowableBlockSize = baseBlockSize * blockSize100k - 20; - } - - private void endBlock() throws IOException { - blockCRC = mCrc.getFinalCRC(); - combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31); - combinedCRC ^= blockCRC; - - /* sort the block and establish posn of original string */ - doReversibleTransformation(); - - /* - A 6-byte block header, the value chosen arbitrarily - as 0x314159265359 :-). A 32 bit value does not really - give a strong enough guarantee that the value will not - appear by chance in the compressed datastream. Worst-case - probability of this event, for a 900k block, is about - 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. - For a compressed file of size 100Gb -- about 100000 blocks -- - only a 48-bit marker will do. NB: normal compression/ - decompression do *not* rely on these statistical properties. - They are only important when trying to recover blocks from - damaged files. - */ - bsPutUChar(0x31); - bsPutUChar(0x41); - bsPutUChar(0x59); - bsPutUChar(0x26); - bsPutUChar(0x53); - bsPutUChar(0x59); - - /* Now the block's CRC, so it is in a known place. */ - bsPutint(blockCRC); - - /* Now a single bit indicating randomisation. */ - if (blockRandomised) { - bsW(1, 1); - nBlocksRandomised++; - } else { - bsW(1, 0); - } - - /* Finally, block's contents proper. */ - moveToFrontCodeAndSend(); - } - - private void endCompression() throws IOException { - /* - Now another magic 48-bit number, 0x177245385090, to - indicate the end of the last block. (sqrt(pi), if - you want to know. I did want to use e, but it contains - too much repetition -- 27 18 28 18 28 46 -- for me - to feel statistically comfortable. Call me paranoid.) - */ - bsPutUChar(0x17); - bsPutUChar(0x72); - bsPutUChar(0x45); - bsPutUChar(0x38); - bsPutUChar(0x50); - bsPutUChar(0x90); - - bsPutint(combinedCRC); - - bsFinishedWithStream(); - } - - private void hbAssignCodes (int[] code, char[] length, int minLen, - int maxLen, int alphaSize) { - int n, vec, i; - - vec = 0; - for (n = minLen; n <= maxLen; n++) { - for (i = 0; i < alphaSize; i++) { - if (length[i] == n) { - code[i] = vec; - vec++; - } - }; - vec <<= 1; - } - } - - private void bsSetStream(OutputStream f) { - bsStream = f; - bsLive = 0; - bsBuff = 0; - bytesOut = 0; - } - - private void bsFinishedWithStream() throws IOException { - while (bsLive > 0) { - int ch = (bsBuff >> 24); - try { - bsStream.write(ch); // write 8-bit - } catch (IOException e) { - throw e; - } - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; - } - } - - private void bsW(int n, int v) throws IOException { - while (bsLive >= 8) { - int ch = (bsBuff >> 24); - try { - bsStream.write(ch); // write 8-bit - } catch (IOException e) { - throw e; - } - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; - } - bsBuff |= (v << (32 - bsLive - n)); - bsLive += n; - } - - private void bsPutUChar(int c) throws IOException { - bsW(8, c); - } - - private void bsPutint(int u) throws IOException { - bsW(8, (u >> 24) & 0xff); - bsW(8, (u >> 16) & 0xff); - bsW(8, (u >> 8) & 0xff); - bsW(8, u & 0xff); - } - - private void bsPutIntVS(int numBits, int c) throws IOException { - bsW(numBits, c); - } - - private void sendMTFValues() throws IOException { - char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE]; - - int v, t, i, j, gs, ge, totc, bt, bc, iter; - int nSelectors = 0, alphaSize, minLen, maxLen, selCtr; - int nGroups; - - alphaSize = nInUse + 2; - for (t = 0; t < N_GROUPS; t++) { - for (v = 0; v < alphaSize; v++) { - len[t][v] = (char) GREATER_ICOST; - } - } - - /* Decide how many coding tables to use */ - if (nMTF <= 0) { - panic(); - } - - if (nMTF < 200) { - nGroups = 2; - } else if (nMTF < 600) { - nGroups = 3; - } else if (nMTF < 1200) { - nGroups = 4; - } else if (nMTF < 2400) { - nGroups = 5; - } else { - nGroups = 6; - } - - /* Generate an initial set of coding tables */ { - int nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs - 1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize - 1) { - ge++; - aFreq += mtfFreq[ge]; - } - - if (ge > gs && nPart != nGroups && nPart != 1 - && ((nGroups - nPart) % 2 == 1)) { - aFreq -= mtfFreq[ge]; - ge--; - } - - for (v = 0; v < alphaSize; v++) { - if (v >= gs && v <= ge) { - len[nPart - 1][v] = (char) LESSER_ICOST; - } else { - len[nPart - 1][v] = (char) GREATER_ICOST; - } - } - - nPart--; - gs = ge + 1; - remF -= aFreq; - } - } - - int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; - int[] fave = new int[N_GROUPS]; - short[] cost = new short[N_GROUPS]; - /* - Iterate up to N_ITERS times to improve the tables. - */ - for (iter = 0; iter < N_ITERS; iter++) { - for (t = 0; t < nGroups; t++) { - fave[t] = 0; - } - - for (t = 0; t < nGroups; t++) { - for (v = 0; v < alphaSize; v++) { - rfreq[t][v] = 0; - } - } - - nSelectors = 0; - totc = 0; - gs = 0; - while (true) { - - /* Set group start & end marks. */ - if (gs >= nMTF) { - break; - } - ge = gs + G_SIZE - 1; - if (ge >= nMTF) { - ge = nMTF - 1; - } - - /* - Calculate the cost of this group as coded - by each of the coding tables. - */ - for (t = 0; t < nGroups; t++) { - cost[t] = 0; - } - - if (nGroups == 6) { - short cost0, cost1, cost2, cost3, cost4, cost5; - cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; - for (i = gs; i <= ge; i++) { - short icv = szptr[i]; - cost0 += len[0][icv]; - cost1 += len[1][icv]; - cost2 += len[2][icv]; - cost3 += len[3][icv]; - cost4 += len[4][icv]; - cost5 += len[5][icv]; - } - cost[0] = cost0; - cost[1] = cost1; - cost[2] = cost2; - cost[3] = cost3; - cost[4] = cost4; - cost[5] = cost5; - } else { - for (i = gs; i <= ge; i++) { - short icv = szptr[i]; - for (t = 0; t < nGroups; t++) { - cost[t] += len[t][icv]; - } - } - } - - /* - Find the coding table which is best for this group, - and record its identity in the selector table. - */ - bc = 999999999; - bt = -1; - for (t = 0; t < nGroups; t++) { - if (cost[t] < bc) { - bc = cost[t]; - bt = t; - } - }; - totc += bc; - fave[bt]++; - selector[nSelectors] = (char) bt; - nSelectors++; - - /* - Increment the symbol frequencies for the selected table. - */ - for (i = gs; i <= ge; i++) { - rfreq[bt][szptr[i]]++; - } - - gs = ge + 1; - } - - /* - Recompute the tables based on the accumulated frequencies. - */ - for (t = 0; t < nGroups; t++) { - hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); - } - } - - rfreq = null; - fave = null; - cost = null; - - if (!(nGroups < 8)) { - panic(); - } - if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) { - panic(); - } - - - /* Compute MTF values for the selectors. */ - { - char[] pos = new char[N_GROUPS]; - char ll_i, tmp2, tmp; - for (i = 0; i < nGroups; i++) { - pos[i] = (char) i; - } - for (i = 0; i < nSelectors; i++) { - ll_i = selector[i]; - j = 0; - tmp = pos[j]; - while (ll_i != tmp) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - } - pos[0] = tmp; - selectorMtf[i] = (char) j; - } - } - - int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE]; - - /* Assign actual codes for the tables. */ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (len[t][i] > maxLen) { - maxLen = len[t][i]; - } - if (len[t][i] < minLen) { - minLen = len[t][i]; - } - } - if (maxLen > 20) { - panic(); - } - if (minLen < 1) { - panic(); - } - hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); - } - - /* Transmit the mapping table. */ - { - boolean[] inUse16 = new boolean[16]; - for (i = 0; i < 16; i++) { - inUse16[i] = false; - for (j = 0; j < 16; j++) { - if (inUse[i * 16 + j]) { - inUse16[i] = true; - } - } - } - - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - bsW(1, 1); - } else { - bsW(1, 0); - } - } - - for (i = 0; i < 16; i++) { - if (inUse16[i]) { - for (j = 0; j < 16; j++) { - if (inUse[i * 16 + j]) { - bsW(1, 1); - } else { - bsW(1, 0); - } - } - } - } - - } - - /* Now the selectors. */ - bsW (3, nGroups); - bsW (15, nSelectors); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < selectorMtf[i]; j++) { - bsW(1, 1); - } - bsW(1, 0); - } - - /* Now the coding tables. */ - for (t = 0; t < nGroups; t++) { - int curr = len[t][0]; - bsW(5, curr); - for (i = 0; i < alphaSize; i++) { - while (curr < len[t][i]) { - bsW(2, 2); - curr++; /* 10 */ - } - while (curr > len[t][i]) { - bsW(2, 3); - curr--; /* 11 */ - } - bsW (1, 0); - } - } - - /* And finally, the block data proper */ - selCtr = 0; - gs = 0; - while (true) { - if (gs >= nMTF) { - break; - } - ge = gs + G_SIZE - 1; - if (ge >= nMTF) { - ge = nMTF - 1; - } - for (i = gs; i <= ge; i++) { - bsW(len[selector[selCtr]][szptr[i]], - code[selector[selCtr]][szptr[i]]); - } - - gs = ge + 1; - selCtr++; - } - if (!(selCtr == nSelectors)) { - panic(); - } - } - - private void moveToFrontCodeAndSend () throws IOException { - bsPutIntVS(24, origPtr); - generateMTFValues(); - sendMTFValues(); - } - - private OutputStream bsStream; - - private void simpleSort(int lo, int hi, int d) { - int i, j, h, bigN, hp; - int v; - - bigN = hi - lo + 1; - if (bigN < 2) { - return; - } - - hp = 0; - while (incs[hp] < bigN) { - hp++; - } - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - - i = lo + h; - while (true) { - /* copy 1 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; - - /* copy 2 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; - - /* copy 3 */ - if (i > hi) { - break; - } - v = zptr[i]; - j = i; - while (fullGtU(zptr[j - h] + d, v + d)) { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) { - break; - } - } - zptr[j] = v; - i++; - - if (workDone > workLimit && firstAttempt) { - return; - } - } - } - } - - private void vswap(int p1, int p2, int n) { - int temp = 0; - while (n > 0) { - temp = zptr[p1]; - zptr[p1] = zptr[p2]; - zptr[p2] = temp; - p1++; - p2++; - n--; - } - } - - private char med3(char a, char b, char c) { - char t; - if (a > b) { - t = a; - a = b; - b = t; - } - if (b > c) { - t = b; - b = c; - c = t; - } - if (a > b) { - b = a; - } - return b; - } - - private static class StackElem { - int ll; - int hh; - int dd; - } - - private void qSort3(int loSt, int hiSt, int dSt) { - int unLo, unHi, ltLo, gtHi, med, n, m; - int sp, lo, hi, d; - StackElem[] stack = new StackElem[QSORT_STACK_SIZE]; - for (int count = 0; count < QSORT_STACK_SIZE; count++) { - stack[count] = new StackElem(); - } - - sp = 0; - - stack[sp].ll = loSt; - stack[sp].hh = hiSt; - stack[sp].dd = dSt; - sp++; - - while (sp > 0) { - if (sp >= QSORT_STACK_SIZE) { - panic(); - } - - sp--; - lo = stack[sp].ll; - hi = stack[sp].hh; - d = stack[sp].dd; - - if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { - simpleSort(lo, hi, d); - if (workDone > workLimit && firstAttempt) { - return; - } - continue; - } - - med = med3(block[zptr[lo] + d + 1], - block[zptr[hi ] + d + 1], - block[zptr[(lo + hi) >> 1] + d + 1]); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (true) { - while (true) { - if (unLo > unHi) { - break; - } - n = ((int) block[zptr[unLo] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[ltLo]; - zptr[ltLo] = temp; - ltLo++; - unLo++; - continue; - }; - if (n > 0) { - break; - } - unLo++; - } - while (true) { - if (unLo > unHi) { - break; - } - n = ((int) block[zptr[unHi] + d + 1]) - med; - if (n == 0) { - int temp = 0; - temp = zptr[unHi]; - zptr[unHi] = zptr[gtHi]; - zptr[gtHi] = temp; - gtHi--; - unHi--; - continue; - }; - if (n < 0) { - break; - } - unHi--; - } - if (unLo > unHi) { - break; - } - int temp = 0; - temp = zptr[unLo]; - zptr[unLo] = zptr[unHi]; - zptr[unHi] = temp; - unLo++; - unHi--; - } - - if (gtHi < ltLo) { - stack[sp].ll = lo; - stack[sp].hh = hi; - stack[sp].dd = d + 1; - sp++; - continue; - } - - n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); - vswap(lo, unLo - n, n); - m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); - vswap(unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack[sp].ll = lo; - stack[sp].hh = n; - stack[sp].dd = d; - sp++; - - stack[sp].ll = n + 1; - stack[sp].hh = m - 1; - stack[sp].dd = d + 1; - sp++; - - stack[sp].ll = m; - stack[sp].hh = hi; - stack[sp].dd = d; - sp++; - } - } - - private void mainSort() { - int i, j, ss, sb; - int[] runningOrder = new int[256]; - int[] copy = new int[256]; - boolean[] bigDone = new boolean[256]; - int c1, c2; - int numQSorted; - - /* - In the various block-sized structures, live data runs - from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, - set up the overshoot area for block. - */ - - // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); - for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) { - block[last + i + 2] = block[(i % (last + 1)) + 1]; - } - for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) { - quadrant[i] = 0; - } - - block[0] = (char) (block[last + 1]); - - if (last < 4000) { - /* - Use simpleSort(), since the full sorting mechanism - has quite a large constant overhead. - */ - for (i = 0; i <= last; i++) { - zptr[i] = i; - } - firstAttempt = false; - workDone = workLimit = 0; - simpleSort(0, last, 0); - } else { - numQSorted = 0; - for (i = 0; i <= 255; i++) { - bigDone[i] = false; - } - - for (i = 0; i <= 65536; i++) { - ftab[i] = 0; - } - - c1 = block[0]; - for (i = 0; i <= last; i++) { - c2 = block[i + 1]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - - for (i = 1; i <= 65536; i++) { - ftab[i] += ftab[i - 1]; - } - - c1 = block[1]; - for (i = 0; i < last; i++) { - c2 = block[i + 2]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - zptr[ftab[j]] = i; - } - - j = ((block[last + 1]) << 8) + (block[1]); - ftab[j]--; - zptr[ftab[j]] = last; - - /* - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - */ - - for (i = 0; i <= 255; i++) { - runningOrder[i] = i; - } - - { - int vv; - int h = 1; - do { - h = 3 * h + 1; - } - while (h <= 256); - do { - h = h / 3; - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while ((ftab[((runningOrder[j - h]) + 1) << 8] - - ftab[(runningOrder[j - h]) << 8]) > - (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) { - runningOrder[j] = runningOrder[j - h]; - j = j - h; - if (j <= (h - 1)) { - break; - } - } - runningOrder[j] = vv; - } - } while (h != 1); - } - - /* - The main sorting loop. - */ - for (i = 0; i <= 255; i++) { - - /* - Process big buckets, starting with the least full. - */ - ss = runningOrder[i]; - - /* - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j]. Hopefully - previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - */ - for (j = 0; j <= 255; j++) { - sb = (ss << 8) + j; - if (!((ftab[sb] & SETMASK) == SETMASK)) { - int lo = ftab[sb] & CLEARMASK; - int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - qSort3(lo, hi, 2); - numQSorted += (hi - lo + 1); - if (workDone > workLimit && firstAttempt) { - return; - } - } - ftab[sb] |= SETMASK; - } - } - - /* - The ss big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - */ - bigDone[ss] = true; - - if (i < 255) { - int bbStart = ftab[ss << 8] & CLEARMASK; - int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; - - while ((bbSize >> shifts) > 65534) { - shifts++; - } - - for (j = 0; j < bbSize; j++) { - int a2update = zptr[bbStart + j]; - int qVal = (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + last + 1] = qVal; - } - } - - if (!(((bbSize - 1) >> shifts) <= 65535)) { - panic(); - } - } - - /* - Now scan this big bucket so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - */ - for (j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } - - for (j = ftab[ss << 8] & CLEARMASK; - j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { - c1 = block[zptr[j]]; - if (!bigDone[c1]) { - zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; - copy[c1]++; - } - } - - for (j = 0; j <= 255; j++) { - ftab[(j << 8) + ss] |= SETMASK; - } - } - } - } - - private void randomiseBlock() { - int i; - int rNToGo = 0; - int rTPos = 0; - for (i = 0; i < 256; i++) { - inUse[i] = false; - } - - for (i = 0; i <= last; i++) { - if (rNToGo == 0) { - rNToGo = (char) rNums[rTPos]; - rTPos++; - if (rTPos == 512) { - rTPos = 0; - } - } - rNToGo--; - block[i + 1] ^= ((rNToGo == 1) ? 1 : 0); - // handle 16 bit signed numbers - block[i + 1] &= 0xFF; - - inUse[block[i + 1]] = true; - } - } - - private void doReversibleTransformation() { - int i; - - workLimit = workFactor * last; - workDone = 0; - blockRandomised = false; - firstAttempt = true; - - mainSort(); - - if (workDone > workLimit && firstAttempt) { - randomiseBlock(); - workLimit = workDone = 0; - blockRandomised = true; - firstAttempt = false; - mainSort(); - } - - origPtr = -1; - for (i = 0; i <= last; i++) { - if (zptr[i] == 0) { - origPtr = i; - break; - } - }; - - if (origPtr == -1) { - panic(); - } - } - - private boolean fullGtU(int i1, int i2) { - int k; - char c1, c2; - int s1, s2; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - i1++; - i2++; - - k = last + 1; - - do { - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) { - return (c1 > c2); - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) { - return (s1 > s2); - } - i1++; - i2++; - - if (i1 > last) { - i1 -= last; - i1--; - }; - if (i2 > last) { - i2 -= last; - i2--; - }; - - k -= 4; - workDone++; - } while (k >= 0); - - return false; - } - - /* - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. - */ - private int[] incs = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - - private void allocateCompressStructures () { - int n = baseBlockSize * blockSize100k; - block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)]; - quadrant = new int[(n + NUM_OVERSHOOT_BYTES)]; - zptr = new int[n]; - ftab = new int[65537]; - - if (block == null || quadrant == null || zptr == null - || ftab == null) { - //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; - //compressOutOfMemory ( totalDraw, n ); - } - - /* - The back end needs a place to store the MTF values - whilst it calculates the coding tables. We could - put them in the zptr array. However, these values - will fit in a short, so we overlay szptr at the - start of zptr, in the hope of reducing the number - of cache misses induced by the multiple traversals - of the MTF values when calculating coding tables. - Seems to improve compression speed by about 1%. - */ - // szptr = zptr; - - - szptr = new short[2 * n]; - } - - private void generateMTFValues() { - char[] yy = new char[256]; - int i, j; - char tmp; - char tmp2; - int zPend; - int wr; - int EOB; - - makeMaps(); - EOB = nInUse + 1; - - for (i = 0; i <= EOB; i++) { - mtfFreq[i] = 0; - } - - wr = 0; - zPend = 0; - for (i = 0; i < nInUse; i++) { - yy[i] = (char) i; - } - - - for (i = 0; i <= last; i++) { - char ll_i; - - ll_i = unseqToSeq[block[zptr[i]]]; - - j = 0; - tmp = yy[j]; - while (ll_i != tmp) { - j++; - tmp2 = tmp; - tmp = yy[j]; - yy[j] = tmp2; - }; - yy[0] = tmp; - - if (j == 0) { - zPend++; - } else { - if (zPend > 0) { - zPend--; - while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; - wr++; - mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; - wr++; - mtfFreq[RUNB]++; - break; - }; - if (zPend < 2) { - break; - } - zPend = (zPend - 2) / 2; - }; - zPend = 0; - } - szptr[wr] = (short) (j + 1); - wr++; - mtfFreq[j + 1]++; - } - } - - if (zPend > 0) { - zPend--; - while (true) { - switch (zPend % 2) { - case 0: - szptr[wr] = (short) RUNA; - wr++; - mtfFreq[RUNA]++; - break; - case 1: - szptr[wr] = (short) RUNB; - wr++; - mtfFreq[RUNB]++; - break; - } - if (zPend < 2) { - break; - } - zPend = (zPend - 2) / 2; - } - } - - szptr[wr] = (short) EOB; - wr++; - mtfFreq[EOB]++; - - nMTF = wr; - } -} - - diff --git a/src/main/java/org/apache/tools/bzip2/CRC.java b/src/main/java/org/apache/tools/bzip2/CRC.java deleted file mode 100644 index bc8bc644b..000000000 --- a/src/main/java/org/apache/tools/bzip2/CRC.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001-2002 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, if - * any, must include the following acknowlegement: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowlegement may appear in the software itself, - * if and wherever such third-party acknowlegements normally appear. - * - * 4. The names "Ant" and "Apache Software - * Foundation" must not be used to endorse or promote products derived - * from this software without prior written permission. For written - * permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache" - * nor may "Apache" appear in their names without prior written - * permission of the Apache Group. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/* - * This package is based on the work done by Keiron Liddle, Aftex Software - * to whom the Ant project is very grateful for his - * great code. - */ - -package org.apache.tools.bzip2; - -/** - * A simple class the hold and calculate the CRC for sanity checking - * of the data. - * - * @author Keiron Liddle - */ -class CRC { - public static int crc32Table[] = { - 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, - 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, - 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, - 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, - 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, - 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, - 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, - 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, - 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, - 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, - 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, - 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, - 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, - 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, - 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, - 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, - 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, - 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, - 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, - 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, - 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, - 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, - 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, - 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, - 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, - 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, - 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, - 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, - 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, - 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, - 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, - 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, - 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, - 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, - 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, - 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, - 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, - 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, - 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, - 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, - 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, - 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, - 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, - 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, - 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, - 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, - 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, - 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, - 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, - 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, - 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, - 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, - 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, - 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, - 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, - 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, - 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, - 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, - 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, - 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, - 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, - 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, - 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, - 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 - }; - - public CRC() { - initialiseCRC(); - } - - void initialiseCRC() { - globalCrc = 0xffffffff; - } - - int getFinalCRC() { - return ~globalCrc; - } - - int getGlobalCRC() { - return globalCrc; - } - - void setGlobalCRC(int newCrc) { - globalCrc = newCrc; - } - - void updateCRC(int inCh) { - int temp = (globalCrc >> 24) ^ inCh; - if (temp < 0) { - temp = 256 + temp; - } - globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; - } - - int globalCrc; -} -