From d5dd04965a7d5b3cb523b633d9519b029261a6e1 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Fri, 18 Nov 2011 20:50:55 +0000 Subject: [PATCH] Allow user to optionally override source encoding in reinterpret function so they can fix up bad projects. Interpret empty string as system default encoding. git-svn-id: http://google-refine.googlecode.com/svn/trunk@2380 7d457c2a-affb-35e4-300a-418c747d4874 --- .../expr/functions/strings/Reinterpret.java | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/main/src/com/google/refine/expr/functions/strings/Reinterpret.java b/main/src/com/google/refine/expr/functions/strings/Reinterpret.java index 3cda82deb..d7a95f539 100644 --- a/main/src/com/google/refine/expr/functions/strings/Reinterpret.java +++ b/main/src/com/google/refine/expr/functions/strings/Reinterpret.java @@ -50,27 +50,53 @@ public class Reinterpret implements Function { @Override public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { + if (args.length == 2 || args.length == 3) { Object o1 = args[0]; Object o2 = args[1]; if (o1 != null && o2 != null && o2 instanceof String) { String str = (o1 instanceof String) ? (String) o1 : o1.toString(); - Project project = (Project) bindings.get("project"); - ProjectMetadata metadata = ProjectManager.singleton.getProjectMetadata(project.id); - String decoder = metadata.getEncoding(); - String encoder = (String) o2; - String reinterpreted = null; + String decoder; + String encoder; - try { - reinterpreted = new String(str.getBytes(decoder), encoder); - } catch (UnsupportedEncodingException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + ": encoding '" + encoder + "' is not available or recognized."); + if (args.length == 2) { + Project project = (Project) bindings.get("project"); + ProjectMetadata metadata = ProjectManager.singleton.getProjectMetadata(project.id); + decoder = metadata.getEncoding(); // can return "" for broken projects + encoder = (String) o2; + } else { + decoder = (String) o2; + encoder = (String) args[2]; } - - return reinterpreted; + return reinterpret(str, decoder, encoder); } } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 arguments"); + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 or 3 arguments"); + } + + private Object reinterpret(String str, String decoder, String encoder) { + String result = null; + + byte[] bytes; + if (decoder == null || decoder.isEmpty()) { + bytes = str.getBytes(); + } else { + try { + bytes = str.getBytes(decoder); + } catch (UnsupportedEncodingException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + ": source encoding '" + decoder + "' is not available or recognized."); + } + } + try { + if (encoder == null || encoder.isEmpty()) { + result = new String(bytes); // system default encoding + } else { + result = new String(bytes, encoder); + } + } catch (UnsupportedEncodingException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + ": encoding '" + encoder + "' is not available or recognized."); + } + + return result; } @Override