From 823fe989a42c52ad7d6b8c6d52777c6b6feacd7d Mon Sep 17 00:00:00 2001 From: David Huynh Date: Tue, 28 Sep 2010 00:26:53 +0000 Subject: [PATCH] =?UTF-8?q?Fixed=20Issue=20110:=20Import=20of=20single=20c?= =?UTF-8?q?olumn=20text=20file=20with=20Postal=20Codes=20shows=20only=201?= =?UTF-8?q?=20row=20with=20lots=20of=20=EF=BF=BD=20chars=20(=3F).=20(by=20?= =?UTF-8?q?enforcing=20a=20confidence=20threshold=20on=20the=20encoding=20?= =?UTF-8?q?guessing)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: http://google-refine.googlecode.com/svn/trunk@1367 7d457c2a-affb-35e4-300a-418c747d4874 --- .../commands/project/CreateProjectCommand.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/main/src/com/google/refine/commands/project/CreateProjectCommand.java b/main/src/com/google/refine/commands/project/CreateProjectCommand.java index 8874e169d..061c6ad21 100644 --- a/main/src/com/google/refine/commands/project/CreateProjectCommand.java +++ b/main/src/com/google/refine/commands/project/CreateProjectCommand.java @@ -421,13 +421,16 @@ public class CreateProjectCommand extends Command { CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll(); for (CharsetMatch charsetMatch : charsetMatches) { try { - reader = new InputStreamReader(inputStream, charsetMatch.getName()); - - options.setProperty("encoding", charsetMatch.getName()); - options.setProperty("encoding_confidence", Integer.toString(charsetMatch.getConfidence())); - - logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence()); - + int confidence = charsetMatch.getConfidence(); + if (confidence >= 50) { + reader = new InputStreamReader(inputStream, charsetMatch.getName()); + + options.setProperty("encoding", charsetMatch.getName()); + options.setProperty("encoding_confidence", Integer.toString(confidence)); + + logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence()); + } + break; } catch (UnsupportedEncodingException e) { // silent