feat: JMH benchmarks added
This commit is contained in:
13
.classpath
13
.classpath
@@ -3,20 +3,27 @@
|
|||||||
<classpathentry kind="src" output="bin/main" path="src/main/java">
|
<classpathentry kind="src" output="bin/main" path="src/main/java">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="main"/>
|
<attribute name="gradle_scope" value="main"/>
|
||||||
<attribute name="gradle_used_by_scope" value="main,test"/>
|
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="src" output="bin/test" path="src/test/java">
|
<classpathentry kind="src" output="bin/test" path="src/test/java">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="test"/>
|
<attribute name="gradle_scope" value="test"/>
|
||||||
<attribute name="gradle_used_by_scope" value="test"/>
|
<attribute name="gradle_used_by_scope" value="test,jmh"/>
|
||||||
<attribute name="test" value="true"/>
|
<attribute name="test" value="true"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="src" output="bin/main" path="src/main/resources">
|
<classpathentry kind="src" output="bin/main" path="src/main/resources">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="main"/>
|
<attribute name="gradle_scope" value="main"/>
|
||||||
<attribute name="gradle_used_by_scope" value="main,test"/>
|
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
||||||
|
</attributes>
|
||||||
|
</classpathentry>
|
||||||
|
<classpathentry kind="src" output="bin/jmh" path="src/jmh/java">
|
||||||
|
<attributes>
|
||||||
|
<attribute name="gradle_scope" value="jmh"/>
|
||||||
|
<attribute name="gradle_used_by_scope" value="jmh"/>
|
||||||
|
<attribute name="test" value="true"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-21/"/>
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-21/"/>
|
||||||
|
|||||||
51
.github/workflows/benchmarks.yml
vendored
Normal file
51
.github/workflows/benchmarks.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
name: Benchmarks
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 3 * * 1'
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- 'src/main/**'
|
||||||
|
- 'src/jmh/**'
|
||||||
|
- 'build.gradle'
|
||||||
|
- 'gradle/**'
|
||||||
|
- 'gradlew'
|
||||||
|
- 'gradlew.bat'
|
||||||
|
- '.github/workflows/benchmarks.yml'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
jmh:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 30
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check out sources
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up JDK 21
|
||||||
|
uses: actions/setup-java@v4
|
||||||
|
with:
|
||||||
|
distribution: temurin
|
||||||
|
java-version: '21'
|
||||||
|
cache: gradle
|
||||||
|
|
||||||
|
- name: Make Gradle executable
|
||||||
|
run: chmod +x ./gradlew
|
||||||
|
|
||||||
|
- name: Run JMH benchmarks
|
||||||
|
run: ./gradlew clean jmh --no-daemon
|
||||||
|
|
||||||
|
- name: Upload JMH reports
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: jmh-reports
|
||||||
|
path: |
|
||||||
|
build/reports/jmh/**
|
||||||
|
build/results/jmh/**
|
||||||
|
if-no-files-found: warn
|
||||||
12
.github/workflows/pages.yml
vendored
12
.github/workflows/pages.yml
vendored
@@ -4,6 +4,16 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
|
paths:
|
||||||
|
- 'src/main/**'
|
||||||
|
- 'src/test/**'
|
||||||
|
- 'src/jmh/**'
|
||||||
|
- 'build.gradle'
|
||||||
|
- 'settings.gradle'
|
||||||
|
- 'gradle/**'
|
||||||
|
- 'gradlew'
|
||||||
|
- 'gradlew.bat'
|
||||||
|
- '.github/workflows/pages.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
@@ -37,7 +47,7 @@ jobs:
|
|||||||
uses: gradle/actions/setup-gradle@v4
|
uses: gradle/actions/setup-gradle@v4
|
||||||
|
|
||||||
- name: Build reports for publication
|
- name: Build reports for publication
|
||||||
run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport pitest
|
run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport pitest jmh
|
||||||
|
|
||||||
- name: Prepare gh-pages worktree
|
- name: Prepare gh-pages worktree
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
24
build.gradle
24
build.gradle
@@ -1,15 +1,19 @@
|
|||||||
plugins {
|
plugins {
|
||||||
id 'java'
|
id 'java'
|
||||||
|
id 'eclipse'
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'pmd'
|
id 'pmd'
|
||||||
id 'jacoco'
|
id 'jacoco'
|
||||||
id 'info.solidsoft.pitest' version '1.19.0'
|
id 'info.solidsoft.pitest' version '1.19.0'
|
||||||
|
id 'me.champeau.jmh' version '0.7.2'
|
||||||
id 'com.palantir.git-version' version '4.0.0'
|
id 'com.palantir.git-version' version '4.0.0'
|
||||||
}
|
}
|
||||||
|
|
||||||
group = 'org.egothor.stemmer'
|
group = 'org.egothor.stemmer'
|
||||||
version = gitVersion(prefix:'release@')
|
version = gitVersion(prefix:'release@')
|
||||||
|
|
||||||
|
def benchmarkReportsDirectory = layout.buildDirectory.dir('reports/jmh')
|
||||||
|
|
||||||
configurations {
|
configurations {
|
||||||
mockitoAgent
|
mockitoAgent
|
||||||
}
|
}
|
||||||
@@ -34,6 +38,8 @@ repositories {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
|
jmhImplementation sourceSets.main.output
|
||||||
|
|
||||||
testImplementation platform(libs.junit.bom)
|
testImplementation platform(libs.junit.bom)
|
||||||
testImplementation libs.junit.jupiter
|
testImplementation libs.junit.jupiter
|
||||||
testRuntimeOnly libs.junit.platform.launcher
|
testRuntimeOnly libs.junit.platform.launcher
|
||||||
@@ -104,6 +110,24 @@ application {
|
|||||||
mainClass = 'org.egothor.stemmer.Compile'
|
mainClass = 'org.egothor.stemmer.Compile'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
jmh {
|
||||||
|
jmhVersion = '1.37'
|
||||||
|
warmupIterations = 3
|
||||||
|
iterations = 5
|
||||||
|
fork = 1
|
||||||
|
benchmarkMode = ['avgt']
|
||||||
|
timeUnit = 'ns'
|
||||||
|
resultFormat = 'CSV'
|
||||||
|
resultsFile = benchmarkReportsDirectory.map { it.file('jmh-results.csv').asFile }.get()
|
||||||
|
humanOutputFile = benchmarkReportsDirectory.map { it.file('jmh-results.txt').asFile }.get()
|
||||||
|
duplicateClassesStrategy = DuplicatesStrategy.EXCLUDE
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named('jmh') {
|
||||||
|
group = 'verification'
|
||||||
|
description = 'Runs JMH benchmarks for the Radixor algorithmic core.'
|
||||||
|
}
|
||||||
|
|
||||||
javadoc {
|
javadoc {
|
||||||
failOnError = false
|
failOnError = false
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,208 @@
|
|||||||
|
package org.egothor.stemmer.benchmark;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.SplittableRandom;
|
||||||
|
import org.egothor.stemmer.FrequencyTrie;
|
||||||
|
import org.egothor.stemmer.PatchCommandEncoder;
|
||||||
|
import org.egothor.stemmer.ReductionSettings;
|
||||||
|
import org.egothor.stemmer.StemmerDictionaryParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds deterministic benchmark corpora used by the JMH suite.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The generated corpus is intentionally synthetic but morphology-shaped: it
|
||||||
|
* creates a stable base vocabulary and derives common inflectional and
|
||||||
|
* derivational variants from each stem. The corpus also injects a controlled
|
||||||
|
* amount of homograph ambiguity so that {@link FrequencyTrie#getAll(String)} is
|
||||||
|
* measured on keys that really produce multiple candidate patch commands.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
final class BenchmarkCorpusSupport {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prefixes used to synthesize pronounceable stems.
|
||||||
|
*/
|
||||||
|
private static final String[] PREFIXES = {
|
||||||
|
"adapt", "align", "anchor", "answer", "apply", "balance", "build", "capture", "center",
|
||||||
|
"change", "collect", "connect", "convert", "cover", "create", "cycle", "declare", "define",
|
||||||
|
"deliver", "derive", "design", "detect", "develop", "drive", "encode", "extend", "filter",
|
||||||
|
"form", "govern", "handle", "improve", "index", "inform", "inspect", "join", "launch",
|
||||||
|
"limit", "manage", "map", "model", "move", "observe", "operate", "organ", "pattern",
|
||||||
|
"perform", "plan", "predict", "prepare", "process", "project", "protect", "publish", "query",
|
||||||
|
"reduce", "refresh", "render", "repeat", "resolve", "return", "scale", "search", "select",
|
||||||
|
"shape", "signal", "sort", "state", "store", "stream", "structure", "supply", "support",
|
||||||
|
"switch", "trace", "transform", "update", "validate", "value"
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffixes used to diversify stems.
|
||||||
|
*/
|
||||||
|
private static final String[] STEM_SUFFIXES = {
|
||||||
|
"", "er", "or", "al", "ive", "ion", "ent", "ant", "ure", "ment", "ist", "ity"
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of neighboring stems sharing one ambiguous surface form.
|
||||||
|
*/
|
||||||
|
private static final int HOMOGRAPH_GROUP_SIZE = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class.
|
||||||
|
*/
|
||||||
|
private BenchmarkCorpusSupport() {
|
||||||
|
throw new AssertionError("No instances.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a deterministic benchmark corpus.
|
||||||
|
*
|
||||||
|
* @param stemCount number of canonical stems to generate
|
||||||
|
* @return immutable benchmark corpus description
|
||||||
|
*/
|
||||||
|
static BenchmarkCorpus createCorpus(final int stemCount) {
|
||||||
|
if (stemCount < 1) {
|
||||||
|
throw new IllegalArgumentException("stemCount must be at least 1.");
|
||||||
|
}
|
||||||
|
|
||||||
|
final StringBuilder dictionaryBuilder = new StringBuilder(stemCount * 120);
|
||||||
|
final LinkedHashSet<String> lookupKeys = new LinkedHashSet<>(stemCount * 8);
|
||||||
|
final LinkedHashSet<String> ambiguousLookupKeys = new LinkedHashSet<>(Math.max(1, stemCount / 4));
|
||||||
|
final SplittableRandom random = new SplittableRandom(20260414L);
|
||||||
|
|
||||||
|
for (int index = 0; index < stemCount; index++) {
|
||||||
|
final String stem = createStem(index);
|
||||||
|
final String[] variants = createVariants(stem, random, index);
|
||||||
|
|
||||||
|
dictionaryBuilder.append(stem);
|
||||||
|
lookupKeys.add(stem);
|
||||||
|
for (String variant : variants) {
|
||||||
|
dictionaryBuilder.append(' ').append(variant);
|
||||||
|
lookupKeys.add(variant);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String homograph = createHomograph(index);
|
||||||
|
dictionaryBuilder.append(' ').append(homograph);
|
||||||
|
lookupKeys.add(homograph);
|
||||||
|
ambiguousLookupKeys.add(homograph);
|
||||||
|
|
||||||
|
dictionaryBuilder.append('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BenchmarkCorpus(
|
||||||
|
dictionaryBuilder.toString(),
|
||||||
|
lookupKeys.toArray(String[]::new),
|
||||||
|
ambiguousLookupKeys.toArray(String[]::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a compiled trie from benchmark corpus text.
|
||||||
|
*
|
||||||
|
* @param corpusText line-oriented dictionary text
|
||||||
|
* @param reductionSettings reduction settings
|
||||||
|
* @param storeOriginalStem whether the canonical stem itself should also be
|
||||||
|
* inserted with the no-op patch
|
||||||
|
* @return compiled trie containing patch commands
|
||||||
|
* @throws IOException if parsing fails
|
||||||
|
*/
|
||||||
|
static FrequencyTrie<String> compilePatchTrie(
|
||||||
|
final String corpusText,
|
||||||
|
final ReductionSettings reductionSettings,
|
||||||
|
final boolean storeOriginalStem) throws IOException {
|
||||||
|
Objects.requireNonNull(corpusText, "corpusText");
|
||||||
|
Objects.requireNonNull(reductionSettings, "reductionSettings");
|
||||||
|
|
||||||
|
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings);
|
||||||
|
final PatchCommandEncoder encoder = new PatchCommandEncoder();
|
||||||
|
|
||||||
|
StemmerDictionaryParser.parse(
|
||||||
|
new StringReader(corpusText),
|
||||||
|
"benchmark-corpus",
|
||||||
|
(stem, variants, lineNumber) -> {
|
||||||
|
if (storeOriginalStem) {
|
||||||
|
builder.put(stem, encoder.encode(stem, stem));
|
||||||
|
}
|
||||||
|
for (String variant : variants) {
|
||||||
|
builder.put(variant, encoder.encode(variant, stem));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates one deterministic stem.
|
||||||
|
*
|
||||||
|
* @param index stem ordinal
|
||||||
|
* @return generated stem
|
||||||
|
*/
|
||||||
|
private static String createStem(final int index) {
|
||||||
|
final String prefix = PREFIXES[index % PREFIXES.length];
|
||||||
|
final String suffix = STEM_SUFFIXES[(index / PREFIXES.length) % STEM_SUFFIXES.length];
|
||||||
|
return (prefix + suffix + base36(index)).toLowerCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a set of deterministic variants for one stem.
|
||||||
|
*
|
||||||
|
* @param stem canonical stem
|
||||||
|
* @param random deterministic random source
|
||||||
|
* @param index stem ordinal
|
||||||
|
* @return generated variants in stable order
|
||||||
|
*/
|
||||||
|
private static String[] createVariants(final String stem, final SplittableRandom random, final int index) {
|
||||||
|
final List<String> variants = new ArrayList<>(8);
|
||||||
|
variants.add(stem + "s");
|
||||||
|
variants.add(stem + "ed");
|
||||||
|
variants.add(stem + "ing");
|
||||||
|
variants.add(stem + "er");
|
||||||
|
variants.add(stem + "ers");
|
||||||
|
variants.add("pre" + stem);
|
||||||
|
variants.add(stem + random.nextInt(10));
|
||||||
|
|
||||||
|
if ((index & 1) == 0) {
|
||||||
|
variants.add(stem + "ly");
|
||||||
|
}
|
||||||
|
if (stem.length() > 5) {
|
||||||
|
variants.add(stem.substring(0, stem.length() - 1));
|
||||||
|
}
|
||||||
|
return variants.toArray(String[]::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an ambiguous surface form shared by a small group of stems.
|
||||||
|
*
|
||||||
|
* @param index stem ordinal
|
||||||
|
* @return shared homograph form
|
||||||
|
*/
|
||||||
|
private static String createHomograph(final int index) {
|
||||||
|
return "shared" + base36(index / HOMOGRAPH_GROUP_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts an ordinal into a compact base-36 discriminator.
|
||||||
|
*
|
||||||
|
* @param value numeric value
|
||||||
|
* @return compact discriminator
|
||||||
|
*/
|
||||||
|
private static String base36(final int value) {
|
||||||
|
return Integer.toString(value, Character.MAX_RADIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Immutable benchmark corpus.
|
||||||
|
*
|
||||||
|
* @param dictionaryText full line-oriented dictionary text
|
||||||
|
* @param lookupKeys keys used for general lookup measurements
|
||||||
|
* @param ambiguousLookupKeys keys that return multiple patch candidates from
|
||||||
|
* {@code getAll()}
|
||||||
|
*/
|
||||||
|
record BenchmarkCorpus(String dictionaryText, String[] lookupKeys, String[] ambiguousLookupKeys) {
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
package org.egothor.stemmer.benchmark;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.egothor.stemmer.ReductionMode;
|
||||||
|
import org.egothor.stemmer.ReductionSettings;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Level;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
import org.openjdk.jmh.infra.Blackhole;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmarks end-to-end dictionary compilation for different reduction modes.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* This benchmark measures the offline path that matters for dictionary build
|
||||||
|
* workflows: dictionary parsing, patch-command generation, mutable trie
|
||||||
|
* population, subtree reduction, and freezing into the compiled read-only trie.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
|
@Warmup(iterations = 3, time = 1)
|
||||||
|
@Measurement(iterations = 5, time = 1)
|
||||||
|
public class FrequencyTrieCompilationBenchmark {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared benchmark state for compilation scenarios.
|
||||||
|
*/
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class CompilationState {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of canonical stems to generate.
|
||||||
|
*/
|
||||||
|
@Param({ "2000", "10000" })
|
||||||
|
public int stemCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reduction mode used during trie compilation.
|
||||||
|
*/
|
||||||
|
@Param({
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS",
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS",
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_DOMINANT_GET_RESULTS"
|
||||||
|
})
|
||||||
|
public String reductionMode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to store the stem itself using the canonical no-op patch.
|
||||||
|
*/
|
||||||
|
@Param({ "true", "false" })
|
||||||
|
public boolean storeOriginalStem;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Full dictionary text used as the benchmark input.
|
||||||
|
*/
|
||||||
|
private String dictionaryText;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the benchmark state.
|
||||||
|
*/
|
||||||
|
@Setup(Level.Trial)
|
||||||
|
public void setUp() {
|
||||||
|
this.dictionaryText = BenchmarkCorpusSupport.createCorpus(this.stemCount).dictionaryText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures end-to-end patch trie compilation latency.
|
||||||
|
*
|
||||||
|
* @param state prepared compilation state
|
||||||
|
* @param blackhole sink preventing dead-code elimination
|
||||||
|
* @throws IOException if dictionary parsing fails
|
||||||
|
*/
|
||||||
|
@Benchmark
|
||||||
|
public void compilePatchTrie(final CompilationState state, final Blackhole blackhole) throws IOException {
|
||||||
|
final ReductionSettings settings =
|
||||||
|
ReductionSettings.withDefaults(ReductionMode.valueOf(state.reductionMode));
|
||||||
|
blackhole.consume(
|
||||||
|
BenchmarkCorpusSupport.compilePatchTrie(
|
||||||
|
state.dictionaryText,
|
||||||
|
settings,
|
||||||
|
state.storeOriginalStem));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
package org.egothor.stemmer.benchmark;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.egothor.stemmer.FrequencyTrie;
|
||||||
|
import org.egothor.stemmer.PatchCommandEncoder;
|
||||||
|
import org.egothor.stemmer.ReductionMode;
|
||||||
|
import org.egothor.stemmer.ReductionSettings;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Level;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
import org.openjdk.jmh.infra.Blackhole;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmarks lookup-oriented operations on compiled Radixor tries.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The benchmark uses a deterministic morphology-shaped corpus and measures the
|
||||||
|
* latency of the hot-path lookup operations that are relevant at runtime:
|
||||||
|
* retrieving the preferred patch command, retrieving all candidate patch
|
||||||
|
* commands, and reconstructing stems from the returned patch values.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Warmup(iterations = 3, time = 1)
|
||||||
|
@Measurement(iterations = 5, time = 1)
|
||||||
|
public class FrequencyTrieLookupBenchmark {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared benchmark state for lookup scenarios.
|
||||||
|
*/
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class LookupState {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of canonical stems to generate.
|
||||||
|
*/
|
||||||
|
@Param({ "2000", "10000" })
|
||||||
|
public int stemCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reduction mode used to compile the lookup trie.
|
||||||
|
*/
|
||||||
|
@Param({
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS",
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS",
|
||||||
|
"MERGE_SUBTREES_WITH_EQUIVALENT_DOMINANT_GET_RESULTS"
|
||||||
|
})
|
||||||
|
public String reductionMode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiled trie under test.
|
||||||
|
*/
|
||||||
|
private FrequencyTrie<String> trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deterministic lookup keys.
|
||||||
|
*/
|
||||||
|
private String[] lookupKeys;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keys that are known to return multiple patch candidates from
|
||||||
|
* {@code getAll()}.
|
||||||
|
*/
|
||||||
|
private String[] ambiguousLookupKeys;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the benchmark state.
|
||||||
|
*
|
||||||
|
* @throws IOException if corpus compilation fails
|
||||||
|
*/
|
||||||
|
@Setup(Level.Trial)
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
final BenchmarkCorpusSupport.BenchmarkCorpus corpus = BenchmarkCorpusSupport.createCorpus(this.stemCount);
|
||||||
|
final ReductionSettings settings =
|
||||||
|
ReductionSettings.withDefaults(ReductionMode.valueOf(this.reductionMode));
|
||||||
|
this.trie = BenchmarkCorpusSupport.compilePatchTrie(corpus.dictionaryText(), settings, true);
|
||||||
|
this.lookupKeys = corpus.lookupKeys();
|
||||||
|
this.ambiguousLookupKeys = corpus.ambiguousLookupKeys();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures preferred patch lookup latency.
|
||||||
|
*
|
||||||
|
* @param state prepared lookup state
|
||||||
|
* @param blackhole sink preventing dead-code elimination
|
||||||
|
*/
|
||||||
|
@Benchmark
|
||||||
|
public void lookupPreferredPatch(final LookupState state, final Blackhole blackhole) {
|
||||||
|
final String[] keys = state.lookupKeys;
|
||||||
|
for (String key : keys) {
|
||||||
|
final String patch = state.trie.get(key);
|
||||||
|
if (patch == null) {
|
||||||
|
throw new IllegalStateException("Missing preferred patch for key " + key + '.');
|
||||||
|
}
|
||||||
|
blackhole.consume(patch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures retrieval of all patch candidates on ambiguous forms.
|
||||||
|
*
|
||||||
|
* @param state prepared lookup state
|
||||||
|
* @param blackhole sink preventing dead-code elimination
|
||||||
|
*/
|
||||||
|
@Benchmark
|
||||||
|
public void lookupAllPatches(final LookupState state, final Blackhole blackhole) {
|
||||||
|
final String[] keys = state.ambiguousLookupKeys;
|
||||||
|
for (String key : keys) {
|
||||||
|
final String[] patches = state.trie.getAll(key);
|
||||||
|
if (patches.length < 2) {
|
||||||
|
throw new IllegalStateException("Expected multiple patches for key " + key + '.');
|
||||||
|
}
|
||||||
|
blackhole.consume(patches);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures end-to-end preferred stemming from lookup plus patch application.
|
||||||
|
*
|
||||||
|
* @param state prepared lookup state
|
||||||
|
* @param blackhole sink preventing dead-code elimination
|
||||||
|
*/
|
||||||
|
@Benchmark
|
||||||
|
public void stemPreferredVariant(final LookupState state, final Blackhole blackhole) {
|
||||||
|
final String[] keys = state.lookupKeys;
|
||||||
|
for (String key : keys) {
|
||||||
|
final String patch = state.trie.get(key);
|
||||||
|
blackhole.consume(PatchCommandEncoder.apply(key, patch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures end-to-end full candidate stemming from {@code getAll()} plus
|
||||||
|
* patch application.
|
||||||
|
*
|
||||||
|
* @param state prepared lookup state
|
||||||
|
* @param blackhole sink preventing dead-code elimination
|
||||||
|
*/
|
||||||
|
@Benchmark
|
||||||
|
public void stemAllVariants(final LookupState state, final Blackhole blackhole) {
|
||||||
|
final String[] keys = state.ambiguousLookupKeys;
|
||||||
|
for (String key : keys) {
|
||||||
|
final String[] patches = state.trie.getAll(key);
|
||||||
|
for (String patch : patches) {
|
||||||
|
blackhole.consume(PatchCommandEncoder.apply(key, patch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
10
src/jmh/java/org/egothor/stemmer/benchmark/package-info.java
Normal file
10
src/jmh/java/org/egothor/stemmer/benchmark/package-info.java
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* JMH benchmarks for the Radixor algorithmic core.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The benchmarks in this package focus on trie lookup latency, retrieval of all
|
||||||
|
* candidate patch commands, and end-to-end dictionary compilation with
|
||||||
|
* different reduction modes.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
package org.egothor.stemmer.benchmark;
|
||||||
Reference in New Issue
Block a user