diff --git a/benchmark/pom.xml b/benchmark/pom.xml new file mode 100644 index 000000000..6d19ee1ce --- /dev/null +++ b/benchmark/pom.xml @@ -0,0 +1,155 @@ + + + 4.0.0 + + openrefine + org.openrefine + 3.5-SNAPSHOT + + + benchmark + jar + + OpenRefine Java JMH benchmarks + + + + ${project.groupId} + main + ${project.version} + provided + + + javax.servlet + servlet-api + 2.5 + provided + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + provided + + + org.testng + testng + 7.1.0 + test + + + + + UTF-8 + 1.23 + 1.8 + openrefine-benchmarks + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.0 + + ${javac.target} + ${javac.target} + ${javac.target} + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.1 + + + package + + shade + + + ${uberjar.name} + + + org.openjdk.jmh.Main + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + ${project.groupId}:main:*:* + *:* + + + + + + + + + + + maven-clean-plugin + 2.5 + + + maven-deploy-plugin + 2.8.1 + + + maven-install-plugin + 2.5.1 + + + maven-jar-plugin + 2.4 + + + maven-javadoc-plugin + 2.9.1 + + + maven-resources-plugin + 2.6 + + + maven-site-plugin + 3.3 + + + maven-source-plugin + 2.2.1 + + + maven-surefire-plugin + 2.17 + + true + + + + + + + diff --git a/benchmark/src/main/java/org/openrefine/ToNumberBenchmark.java b/benchmark/src/main/java/org/openrefine/ToNumberBenchmark.java new file mode 100644 index 000000000..456986fd9 --- /dev/null +++ b/benchmark/src/main/java/org/openrefine/ToNumberBenchmark.java @@ -0,0 +1,94 @@ +/******************************************************************************* + * Copyright (C) 2020, OpenRefine contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ +package org.openrefine.benchmark; + +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import com.google.refine.expr.functions.ToNumber; + +public class ToNumberBenchmark { + + static Properties bindings = new Properties(); + + @State(Scope.Benchmark) + public static class ExecutionPlan { + + @Param({"1000", "10000" }) + public int iterations; + + public ToNumber f; + String[] args = new String[1]; + String testData; + String testDataInt; + Random rnd = new Random(); + + @Setup(Level.Invocation) + public void setUp() { + f = new ToNumber(); + testData = Double.toString(rnd.nextDouble() * 10000); + testDataInt = testData.replace(".", ""); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS) + public void toDoubleNew(ExecutionPlan plan, Blackhole blackhole) { + plan.args[0] = plan.testData; + blackhole.consume(plan.f.call(bindings, plan.args)); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS) + @Fork(1) + public void toLongNew(ExecutionPlan plan, Blackhole blackhole) { + plan.args[0] = plan.testDataInt; + blackhole.consume(plan.f.call(bindings, plan.args)); + } +} + diff --git a/main/pom.xml b/main/pom.xml index e3a730b35..abfef9497 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -380,18 +380,18 @@ powermock-module-testng ${powermock.version} test - - + + org.powermock powermock-api-mockito2 ${powermock.version} test - - + + com.squareup.okhttp3 mockwebserver 4.7.2 test - + diff --git a/main/src/com/google/refine/expr/functions/ToNumber.java b/main/src/com/google/refine/expr/functions/ToNumber.java index e4d6c9a80..fb0121387 100644 --- a/main/src/com/google/refine/expr/functions/ToNumber.java +++ b/main/src/com/google/refine/expr/functions/ToNumber.java @@ -41,32 +41,37 @@ import com.google.refine.grel.Function; public class ToNumber implements Function { - @Override + @Override public Object call(Properties bindings, Object[] args) { if (args.length == 1 && args[0] != null) { if (args[0] instanceof Number) { return args[0]; } else { - String s = args[0].toString().trim(); - if (s.length() > 0) { - try { - return Long.parseLong(s); - } catch (NumberFormatException e) { - } - try { - return Double.parseDouble(s); - } catch (NumberFormatException e) { - return new EvalError("Unable to parse as number"); - } + String s; + if (args[0] instanceof String) { + s = (String)args[0]; } else { - return new EvalError("Unable to parse as number"); + s = args[0].toString(); } + if (s.length() > 0) { + if (!s.contains(".")) { // lightweight test for strings which will definitely fail + try { + return Long.valueOf(s, 10); + } catch (NumberFormatException e) { + } + } + try { + return Double.valueOf(s); + } catch (NumberFormatException e) { + } + } + return new EvalError("Unable to parse as number"); } } else { return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one non-null argument"); } } - + @Override public String getDescription() { return "Returns o converted to a number"; diff --git a/pom.xml b/pom.xml index 74c0ca52a..5e29a6489 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,5 @@ - + + 4.0.0 org.openrefine @@ -49,6 +49,7 @@ server extensions packaging + benchmark @@ -57,7 +58,7 @@ 3333 /tmp/refine 2.22.2 - + UTF-8 2.11.1 1.7.30 @@ -232,4 +233,3 @@ -