Micro benchmark harness & ToNumber optimizations (#2859)
* Performance optimized version of ToNumber Approximately 5x faster for floats (data dependent) and about the same speed for integers. - Instead of blindly trying to parse as Long, do a quick check for obvious problems (e.g. decimal point). - Don't trim. It's already done by called methods. - Use valueOf() instead of parse() to avoid object creation * Add Java Microbenchmark Harness The shaded JAR is missing the OpenRefine classes, for a reason that I haven't figured out, so requires openrefine-main.jar at runtime. * Remove old implementations of ToNumber * Remove unneeded dependencies from main project * Clean up and reformat
This commit is contained in:
parent
a88aeca304
commit
df8d092132
155
benchmark/pom.xml
Normal file
155
benchmark/pom.xml
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<artifactId>openrefine</artifactId>
|
||||||
|
<groupId>org.openrefine</groupId>
|
||||||
|
<version>3.5-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>benchmark</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<name>OpenRefine Java JMH benchmarks</name>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${project.groupId}</groupId>
|
||||||
|
<artifactId>main</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
<version>2.5</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-core</artifactId>
|
||||||
|
<version>${jmh.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-generator-annprocess</artifactId>
|
||||||
|
<version>${jmh.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.testng</groupId>
|
||||||
|
<artifactId>testng</artifactId>
|
||||||
|
<version>7.1.0</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<jmh.version>1.23</jmh.version>
|
||||||
|
<javac.target>1.8</javac.target>
|
||||||
|
<uberjar.name>openrefine-benchmarks</uberjar.name>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.8.0</version>
|
||||||
|
<configuration>
|
||||||
|
<compilerVersion>${javac.target}</compilerVersion>
|
||||||
|
<source>${javac.target}</source>
|
||||||
|
<target>${javac.target}</target>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-shade-plugin</artifactId>
|
||||||
|
<version>3.2.1</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>shade</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<finalName>${uberjar.name}</finalName>
|
||||||
|
<transformers>
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||||
|
<mainClass>org.openjdk.jmh.Main</mainClass>
|
||||||
|
</transformer>
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
|
||||||
|
</transformers>
|
||||||
|
<filters>
|
||||||
|
<filter>
|
||||||
|
<!-- Shading signed JARs will fail without this. http://stackoverflow.com/questions/999489/invalid-signature-file-when-attempting-to-run-a-jar -->
|
||||||
|
<artifact>*:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>META-INF/*.SF</exclude>
|
||||||
|
<exclude>META-INF/*.DSA</exclude>
|
||||||
|
<exclude>META-INF/*.RSA</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
|
</filters>
|
||||||
|
<artifactSet>
|
||||||
|
<includes>
|
||||||
|
<include>${project.groupId}:main:*:*</include>
|
||||||
|
<include>*:*</include>
|
||||||
|
</includes>
|
||||||
|
</artifactSet>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
<pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-clean-plugin</artifactId>
|
||||||
|
<version>2.5</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-deploy-plugin</artifactId>
|
||||||
|
<version>2.8.1</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
|
<version>2.5.1</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<version>2.4</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-javadoc-plugin</artifactId>
|
||||||
|
<version>2.9.1</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
|
<version>2.6</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-site-plugin</artifactId>
|
||||||
|
<version>3.3</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<version>2.2.1</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<version>2.17</version>
|
||||||
|
<configuration>
|
||||||
|
<skipTests>true</skipTests>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</pluginManagement>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
</project>
|
@ -0,0 +1,94 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (C) 2020, OpenRefine contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
******************************************************************************/
|
||||||
|
package org.openrefine.benchmark;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Level;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
import org.openjdk.jmh.infra.Blackhole;
|
||||||
|
|
||||||
|
import com.google.refine.expr.functions.ToNumber;
|
||||||
|
|
||||||
|
public class ToNumberBenchmark {
|
||||||
|
|
||||||
|
static Properties bindings = new Properties();
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class ExecutionPlan {
|
||||||
|
|
||||||
|
@Param({"1000", "10000" })
|
||||||
|
public int iterations;
|
||||||
|
|
||||||
|
public ToNumber f;
|
||||||
|
String[] args = new String[1];
|
||||||
|
String testData;
|
||||||
|
String testDataInt;
|
||||||
|
Random rnd = new Random();
|
||||||
|
|
||||||
|
@Setup(Level.Invocation)
|
||||||
|
public void setUp() {
|
||||||
|
f = new ToNumber();
|
||||||
|
testData = Double.toString(rnd.nextDouble() * 10000);
|
||||||
|
testDataInt = testData.replace(".", "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
public void toDoubleNew(ExecutionPlan plan, Blackhole blackhole) {
|
||||||
|
plan.args[0] = plan.testData;
|
||||||
|
blackhole.consume(plan.f.call(bindings, plan.args));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Warmup(iterations = 3, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 5, time = 200, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Fork(1)
|
||||||
|
public void toLongNew(ExecutionPlan plan, Blackhole blackhole) {
|
||||||
|
plan.args[0] = plan.testDataInt;
|
||||||
|
blackhole.consume(plan.f.call(bindings, plan.args));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -47,20 +47,25 @@ public class ToNumber implements Function {
|
|||||||
if (args[0] instanceof Number) {
|
if (args[0] instanceof Number) {
|
||||||
return args[0];
|
return args[0];
|
||||||
} else {
|
} else {
|
||||||
String s = args[0].toString().trim();
|
String s;
|
||||||
if (s.length() > 0) {
|
if (args[0] instanceof String) {
|
||||||
try {
|
s = (String)args[0];
|
||||||
return Long.parseLong(s);
|
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
return Double.parseDouble(s);
|
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
return new EvalError("Unable to parse as number");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
return new EvalError("Unable to parse as number");
|
s = args[0].toString();
|
||||||
}
|
}
|
||||||
|
if (s.length() > 0) {
|
||||||
|
if (!s.contains(".")) { // lightweight test for strings which will definitely fail
|
||||||
|
try {
|
||||||
|
return Long.valueOf(s, 10);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return Double.valueOf(s);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new EvalError("Unable to parse as number");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one non-null argument");
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one non-null argument");
|
||||||
|
8
pom.xml
8
pom.xml
@ -1,5 +1,5 @@
|
|||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>org.openrefine</groupId>
|
<groupId>org.openrefine</groupId>
|
||||||
@ -49,6 +49,7 @@
|
|||||||
<module>server</module>
|
<module>server</module>
|
||||||
<module>extensions</module>
|
<module>extensions</module>
|
||||||
<module>packaging</module>
|
<module>packaging</module>
|
||||||
|
<module>benchmark</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
|
|
||||||
@ -57,7 +58,7 @@
|
|||||||
<jee.port>3333</jee.port>
|
<jee.port>3333</jee.port>
|
||||||
<refine.data>/tmp/refine</refine.data>
|
<refine.data>/tmp/refine</refine.data>
|
||||||
<surefire.version>2.22.2</surefire.version>
|
<surefire.version>2.22.2</surefire.version>
|
||||||
<surefireArgs></surefireArgs>
|
<surefireArgs/>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<jackson.version>2.11.1</jackson.version>
|
<jackson.version>2.11.1</jackson.version>
|
||||||
<slf4j.version>1.7.30</slf4j.version>
|
<slf4j.version>1.7.30</slf4j.version>
|
||||||
@ -232,4 +233,3 @@
|
|||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user