Micro benchmark harness & ToNumber optimizations (#2859)

* Performance optimized version of ToNumber

Approximately 5x faster for floats (data dependent)
and about the same speed for integers.

- Instead of blindly trying to parse as Long, do a quick check
  for obvious problems (e.g. decimal point).
- Don't trim. It's already done by called methods.
- Use valueOf() instead of parse() to avoid object creation

* Add Java Microbenchmark Harness

The shaded JAR is missing the OpenRefine classes, for a reason
that I haven't figured out, so requires openrefine-main.jar at runtime.

* Remove old implementations of ToNumber

* Remove unneeded dependencies from main project

* Clean up and reformat
This commit is contained in:
Tom Morris 2020-07-03 15:42:44 -04:00 committed by GitHub
parent a88aeca304
commit df8d092132
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 277 additions and 23 deletions

155
benchmark/pom.xml Normal file
View File

@ -0,0 +1,155 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>openrefine</artifactId>
<groupId>org.openrefine</groupId>
<version>3.5-SNAPSHOT</version>
</parent>
<artifactId>benchmark</artifactId>
<packaging>jar</packaging>
<name>OpenRefine Java JMH benchmarks</name>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>main</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>7.1.0</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jmh.version>1.23</jmh.version>
<javac.target>1.8</javac.target>
<uberjar.name>openrefine-benchmarks</uberjar.name>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<compilerVersion>${javac.target}</compilerVersion>
<source>${javac.target}</source>
<target>${javac.target}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<finalName>${uberjar.name}</finalName>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.openjdk.jmh.Main</mainClass>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
</transformers>
<filters>
<filter>
<!-- Shading signed JARs will fail without this. http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar -->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<artifactSet>
<includes>
<include>${project.groupId}:main:*:*</include>
<include>*:*</include>
</includes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.1</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>2.6</version>
</plugin>
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.3</version>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.17</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -0,0 +1,94 @@
/*******************************************************************************
* Copyright (C) 2020, OpenRefine contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package org.openrefine.benchmark;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import com.google.refine.expr.functions.ToNumber;
public class ToNumberBenchmark {
static Properties bindings = new Properties();
@State(Scope.Benchmark)
public static class ExecutionPlan {
@Param({"1000", "10000" })
public int iterations;
public ToNumber f;
String[] args = new String[1];
String testData;
String testDataInt;
Random rnd = new Random();
@Setup(Level.Invocation)
public void setUp() {
f = new ToNumber();
testData = Double.toString(rnd.nextDouble() * 10000);
testDataInt = testData.replace(".", "");
}
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
public void toDoubleNew(ExecutionPlan plan, Blackhole blackhole) {
plan.args[0] = plan.testData;
blackhole.consume(plan.f.call(bindings, plan.args));
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
@Fork(1)
public void toLongNew(ExecutionPlan plan, Blackhole blackhole) {
plan.args[0] = plan.testDataInt;
blackhole.consume(plan.f.call(bindings, plan.args));
}
}

View File

@ -380,18 +380,18 @@
<artifactId>powermock-module-testng</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito2</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>mockwebserver</artifactId>
<version>4.7.2</version>
<scope>test</scope>
</dependency>
</dependency>
</dependencies>
</project>

View File

@ -41,32 +41,37 @@ import com.google.refine.grel.Function;
public class ToNumber implements Function {
@Override
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length == 1 && args[0] != null) {
if (args[0] instanceof Number) {
return args[0];
} else {
String s = args[0].toString().trim();
if (s.length() > 0) {
try {
return Long.parseLong(s);
} catch (NumberFormatException e) {
}
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
return new EvalError("Unable to parse as number");
}
String s;
if (args[0] instanceof String) {
s = (String)args[0];
} else {
return new EvalError("Unable to parse as number");
s = args[0].toString();
}
if (s.length() > 0) {
if (!s.contains(".")) { // lightweight test for strings which will definitely fail
try {
return Long.valueOf(s, 10);
} catch (NumberFormatException e) {
}
}
try {
return Double.valueOf(s);
} catch (NumberFormatException e) {
}
}
return new EvalError("Unable to parse as number");
}
} else {
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one non-null argument");
}
}
@Override
public String getDescription() {
return "Returns o converted to a number";

View File

@ -1,5 +1,5 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.openrefine</groupId>
@ -49,6 +49,7 @@
<module>server</module>
<module>extensions</module>
<module>packaging</module>
<module>benchmark</module>
</modules>
@ -57,7 +58,7 @@
<jee.port>3333</jee.port>
<refine.data>/tmp/refine</refine.data>
<surefire.version>2.22.2</surefire.version>
<surefireArgs></surefireArgs>
<surefireArgs/>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jackson.version>2.11.1</jackson.version>
<slf4j.version>1.7.30</slf4j.version>
@ -232,4 +233,3 @@
</repositories>
</project>