Micro benchmark harness & ToNumber optimizations (#2859)
* Performance optimized version of ToNumber Approximately 5x faster for floats (data dependent) and about the same speed for integers. - Instead of blindly trying to parse as Long, do a quick check for obvious problems (e.g. decimal point). - Don't trim. It's already done by called methods. - Use valueOf() instead of parse() to avoid object creation * Add Java Microbenchmark Harness The shaded JAR is missing the OpenRefine classes, for a reason that I haven't figured out, so requires openrefine-main.jar at runtime. * Remove old implementations of ToNumber * Remove unneeded dependencies from main project * Clean up and reformat
This commit is contained in:
parent
a88aeca304
commit
df8d092132
155
benchmark/pom.xml
Normal file
155
benchmark/pom.xml
Normal file
@ -0,0 +1,155 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>openrefine</artifactId>
|
||||
<groupId>org.openrefine</groupId>
|
||||
<version>3.5-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>benchmark</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>OpenRefine Java JMH benchmarks</name>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>main</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<version>2.5</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-core</artifactId>
|
||||
<version>${jmh.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-generator-annprocess</artifactId>
|
||||
<version>${jmh.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.testng</groupId>
|
||||
<artifactId>testng</artifactId>
|
||||
<version>7.1.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<jmh.version>1.23</jmh.version>
|
||||
<javac.target>1.8</javac.target>
|
||||
<uberjar.name>openrefine-benchmarks</uberjar.name>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.0</version>
|
||||
<configuration>
|
||||
<compilerVersion>${javac.target}</compilerVersion>
|
||||
<source>${javac.target}</source>
|
||||
<target>${javac.target}</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<finalName>${uberjar.name}</finalName>
|
||||
<transformers>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>org.openjdk.jmh.Main</mainClass>
|
||||
</transformer>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
|
||||
</transformers>
|
||||
<filters>
|
||||
<filter>
|
||||
<!-- Shading signed JARs will fail without this. http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar -->
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<artifactSet>
|
||||
<includes>
|
||||
<include>${project.groupId}:main:*:*</include>
|
||||
<include>*:*</include>
|
||||
</includes>
|
||||
</artifactSet>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>2.5</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.8.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>2.5.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>2.9.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>2.6</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.3</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>2.2.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.17</version>
|
||||
<configuration>
|
||||
<skipTests>true</skipTests>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
</project>
|
@ -0,0 +1,94 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (C) 2020, OpenRefine contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************************/
|
||||
package org.openrefine.benchmark;
|
||||
|
||||
import java.util.Properties;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import com.google.refine.expr.functions.ToNumber;
|
||||
|
||||
public class ToNumberBenchmark {
|
||||
|
||||
static Properties bindings = new Properties();
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class ExecutionPlan {
|
||||
|
||||
@Param({"1000", "10000" })
|
||||
public int iterations;
|
||||
|
||||
public ToNumber f;
|
||||
String[] args = new String[1];
|
||||
String testData;
|
||||
String testDataInt;
|
||||
Random rnd = new Random();
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void setUp() {
|
||||
f = new ToNumber();
|
||||
testData = Double.toString(rnd.nextDouble() * 10000);
|
||||
testDataInt = testData.replace(".", "");
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||
public void toDoubleNew(ExecutionPlan plan, Blackhole blackhole) {
|
||||
plan.args[0] = plan.testData;
|
||||
blackhole.consume(plan.f.call(bindings, plan.args));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(1)
|
||||
public void toLongNew(ExecutionPlan plan, Blackhole blackhole) {
|
||||
plan.args[0] = plan.testDataInt;
|
||||
blackhole.consume(plan.f.call(bindings, plan.args));
|
||||
}
|
||||
}
|
||||
|
10
main/pom.xml
10
main/pom.xml
@ -380,18 +380,18 @@
|
||||
<artifactId>powermock-module-testng</artifactId>
|
||||
<version>${powermock.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.powermock</groupId>
|
||||
<artifactId>powermock-api-mockito2</artifactId>
|
||||
<version>${powermock.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>mockwebserver</artifactId>
|
||||
<version>4.7.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
@ -41,32 +41,37 @@ import com.google.refine.grel.Function;
|
||||
|
||||
public class ToNumber implements Function {
|
||||
|
||||
@Override
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length == 1 && args[0] != null) {
|
||||
if (args[0] instanceof Number) {
|
||||
return args[0];
|
||||
} else {
|
||||
String s = args[0].toString().trim();
|
||||
if (s.length() > 0) {
|
||||
try {
|
||||
return Long.parseLong(s);
|
||||
} catch (NumberFormatException e) {
|
||||
}
|
||||
try {
|
||||
return Double.parseDouble(s);
|
||||
} catch (NumberFormatException e) {
|
||||
return new EvalError("Unable to parse as number");
|
||||
}
|
||||
String s;
|
||||
if (args[0] instanceof String) {
|
||||
s = (String)args[0];
|
||||
} else {
|
||||
return new EvalError("Unable to parse as number");
|
||||
s = args[0].toString();
|
||||
}
|
||||
if (s.length() > 0) {
|
||||
if (!s.contains(".")) { // lightweight test for strings which will definitely fail
|
||||
try {
|
||||
return Long.valueOf(s, 10);
|
||||
} catch (NumberFormatException e) {
|
||||
}
|
||||
}
|
||||
try {
|
||||
return Double.valueOf(s);
|
||||
} catch (NumberFormatException e) {
|
||||
}
|
||||
}
|
||||
return new EvalError("Unable to parse as number");
|
||||
}
|
||||
} else {
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one non-null argument");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Returns o converted to a number";
|
||||
|
8
pom.xml
8
pom.xml
@ -1,5 +1,5 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.openrefine</groupId>
|
||||
@ -49,6 +49,7 @@
|
||||
<module>server</module>
|
||||
<module>extensions</module>
|
||||
<module>packaging</module>
|
||||
<module>benchmark</module>
|
||||
</modules>
|
||||
|
||||
|
||||
@ -57,7 +58,7 @@
|
||||
<jee.port>3333</jee.port>
|
||||
<refine.data>/tmp/refine</refine.data>
|
||||
<surefire.version>2.22.2</surefire.version>
|
||||
<surefireArgs></surefireArgs>
|
||||
<surefireArgs/>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<jackson.version>2.11.1</jackson.version>
|
||||
<slf4j.version>1.7.30</slf4j.version>
|
||||
@ -232,4 +233,3 @@
|
||||
</repositories>
|
||||
|
||||
</project>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user