feat: JMH benchmarks added
This commit is contained in:
13
.classpath
13
.classpath
@@ -3,20 +3,27 @@
|
||||
<classpathentry kind="src" output="bin/main" path="src/main/java">
|
||||
<attributes>
|
||||
<attribute name="gradle_scope" value="main"/>
|
||||
<attribute name="gradle_used_by_scope" value="main,test"/>
|
||||
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" output="bin/test" path="src/test/java">
|
||||
<attributes>
|
||||
<attribute name="gradle_scope" value="test"/>
|
||||
<attribute name="gradle_used_by_scope" value="test"/>
|
||||
<attribute name="gradle_used_by_scope" value="test,jmh"/>
|
||||
<attribute name="test" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" output="bin/main" path="src/main/resources">
|
||||
<attributes>
|
||||
<attribute name="gradle_scope" value="main"/>
|
||||
<attribute name="gradle_used_by_scope" value="main,test"/>
|
||||
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" output="bin/jmh" path="src/jmh/java">
|
||||
<attributes>
|
||||
<attribute name="gradle_scope" value="jmh"/>
|
||||
<attribute name="gradle_used_by_scope" value="jmh"/>
|
||||
<attribute name="test" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-21/"/>
|
||||
|
||||
51
.github/workflows/benchmarks.yml
vendored
Normal file
51
.github/workflows/benchmarks.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 3 * * 1'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'src/main/**'
|
||||
- 'src/jmh/**'
|
||||
- 'build.gradle'
|
||||
- 'gradle/**'
|
||||
- 'gradlew'
|
||||
- 'gradlew.bat'
|
||||
- '.github/workflows/benchmarks.yml'
|
||||
|
||||
jobs:
|
||||
jmh:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Check out sources
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up JDK 21
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: '21'
|
||||
cache: gradle
|
||||
|
||||
- name: Make Gradle executable
|
||||
run: chmod +x ./gradlew
|
||||
|
||||
- name: Run JMH benchmarks
|
||||
run: ./gradlew clean jmh --no-daemon
|
||||
|
||||
- name: Upload JMH reports
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: jmh-reports
|
||||
path: |
|
||||
build/reports/jmh/**
|
||||
build/results/jmh/**
|
||||
if-no-files-found: warn
|
||||
12
.github/workflows/pages.yml
vendored
12
.github/workflows/pages.yml
vendored
@@ -4,6 +4,16 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'src/main/**'
|
||||
- 'src/test/**'
|
||||
- 'src/jmh/**'
|
||||
- 'build.gradle'
|
||||
- 'settings.gradle'
|
||||
- 'gradle/**'
|
||||
- 'gradlew'
|
||||
- 'gradlew.bat'
|
||||
- '.github/workflows/pages.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
@@ -37,7 +47,7 @@ jobs:
|
||||
uses: gradle/actions/setup-gradle@v4
|
||||
|
||||
- name: Build reports for publication
|
||||
run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport pitest
|
||||
run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport pitest jmh
|
||||
|
||||
- name: Prepare gh-pages worktree
|
||||
shell: bash
|
||||
|
||||
24
build.gradle
24
build.gradle
@@ -1,15 +1,19 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'eclipse'
|
||||
id 'application'
|
||||
id 'pmd'
|
||||
id 'jacoco'
|
||||
id 'info.solidsoft.pitest' version '1.19.0'
|
||||
id 'me.champeau.jmh' version '0.7.2'
|
||||
id 'com.palantir.git-version' version '4.0.0'
|
||||
}
|
||||
|
||||
group = 'org.egothor.stemmer'
|
||||
version = gitVersion(prefix:'release@')
|
||||
|
||||
def benchmarkReportsDirectory = layout.buildDirectory.dir('reports/jmh')
|
||||
|
||||
configurations {
|
||||
mockitoAgent
|
||||
}
|
||||
@@ -34,6 +38,8 @@ repositories {
|
||||
}
|
||||
|
||||
dependencies {
|
||||
jmhImplementation sourceSets.main.output
|
||||
|
||||
testImplementation platform(libs.junit.bom)
|
||||
testImplementation libs.junit.jupiter
|
||||
testRuntimeOnly libs.junit.platform.launcher
|
||||
@@ -104,6 +110,24 @@ application {
|
||||
mainClass = 'org.egothor.stemmer.Compile'
|
||||
}
|
||||
|
||||
jmh {
|
||||
jmhVersion = '1.37'
|
||||
warmupIterations = 3
|
||||
iterations = 5
|
||||
fork = 1
|
||||
benchmarkMode = ['avgt']
|
||||
timeUnit = 'ns'
|
||||
resultFormat = 'CSV'
|
||||
resultsFile = benchmarkReportsDirectory.map { it.file('jmh-results.csv').asFile }.get()
|
||||
humanOutputFile = benchmarkReportsDirectory.map { it.file('jmh-results.txt').asFile }.get()
|
||||
duplicateClassesStrategy = DuplicatesStrategy.EXCLUDE
|
||||
}
|
||||
|
||||
tasks.named('jmh') {
|
||||
group = 'verification'
|
||||
description = 'Runs JMH benchmarks for the Radixor algorithmic core.'
|
||||
}
|
||||
|
||||
javadoc {
|
||||
failOnError = false
|
||||
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
package org.egothor.stemmer.benchmark;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.SplittableRandom;
|
||||
import org.egothor.stemmer.FrequencyTrie;
|
||||
import org.egothor.stemmer.PatchCommandEncoder;
|
||||
import org.egothor.stemmer.ReductionSettings;
|
||||
import org.egothor.stemmer.StemmerDictionaryParser;
|
||||
|
||||
/**
|
||||
* Builds deterministic benchmark corpora used by the JMH suite.
|
||||
*
|
||||
* <p>
|
||||
* The generated corpus is intentionally synthetic but morphology-shaped: it
|
||||
* creates a stable base vocabulary and derives common inflectional and
|
||||
* derivational variants from each stem. The corpus also injects a controlled
|
||||
* amount of homograph ambiguity so that {@link FrequencyTrie#getAll(String)} is
|
||||
* measured on keys that really produce multiple candidate patch commands.
|
||||
* </p>
|
||||
*/
|
||||
final class BenchmarkCorpusSupport {
|
||||
|
||||
/**
|
||||
* Prefixes used to synthesize pronounceable stems.
|
||||
*/
|
||||
private static final String[] PREFIXES = {
|
||||
"adapt", "align", "anchor", "answer", "apply", "balance", "build", "capture", "center",
|
||||
"change", "collect", "connect", "convert", "cover", "create", "cycle", "declare", "define",
|
||||
"deliver", "derive", "design", "detect", "develop", "drive", "encode", "extend", "filter",
|
||||
"form", "govern", "handle", "improve", "index", "inform", "inspect", "join", "launch",
|
||||
"limit", "manage", "map", "model", "move", "observe", "operate", "organ", "pattern",
|
||||
"perform", "plan", "predict", "prepare", "process", "project", "protect", "publish", "query",
|
||||
"reduce", "refresh", "render", "repeat", "resolve", "return", "scale", "search", "select",
|
||||
"shape", "signal", "sort", "state", "store", "stream", "structure", "supply", "support",
|
||||
"switch", "trace", "transform", "update", "validate", "value"
|
||||
};
|
||||
|
||||
/**
|
||||
* Suffixes used to diversify stems.
|
||||
*/
|
||||
private static final String[] STEM_SUFFIXES = {
|
||||
"", "er", "or", "al", "ive", "ion", "ent", "ant", "ure", "ment", "ist", "ity"
|
||||
};
|
||||
|
||||
/**
|
||||
* Number of neighboring stems sharing one ambiguous surface form.
|
||||
*/
|
||||
private static final int HOMOGRAPH_GROUP_SIZE = 4;
|
||||
|
||||
/**
|
||||
* Utility class.
|
||||
*/
|
||||
private BenchmarkCorpusSupport() {
|
||||
throw new AssertionError("No instances.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a deterministic benchmark corpus.
|
||||
*
|
||||
* @param stemCount number of canonical stems to generate
|
||||
* @return immutable benchmark corpus description
|
||||
*/
|
||||
static BenchmarkCorpus createCorpus(final int stemCount) {
|
||||
if (stemCount < 1) {
|
||||
throw new IllegalArgumentException("stemCount must be at least 1.");
|
||||
}
|
||||
|
||||
final StringBuilder dictionaryBuilder = new StringBuilder(stemCount * 120);
|
||||
final LinkedHashSet<String> lookupKeys = new LinkedHashSet<>(stemCount * 8);
|
||||
final LinkedHashSet<String> ambiguousLookupKeys = new LinkedHashSet<>(Math.max(1, stemCount / 4));
|
||||
final SplittableRandom random = new SplittableRandom(20260414L);
|
||||
|
||||
for (int index = 0; index < stemCount; index++) {
|
||||
final String stem = createStem(index);
|
||||
final String[] variants = createVariants(stem, random, index);
|
||||
|
||||
dictionaryBuilder.append(stem);
|
||||
lookupKeys.add(stem);
|
||||
for (String variant : variants) {
|
||||
dictionaryBuilder.append(' ').append(variant);
|
||||
lookupKeys.add(variant);
|
||||
}
|
||||
|
||||
final String homograph = createHomograph(index);
|
||||
dictionaryBuilder.append(' ').append(homograph);
|
||||
lookupKeys.add(homograph);
|
||||
ambiguousLookupKeys.add(homograph);
|
||||
|
||||
dictionaryBuilder.append('\n');
|
||||
}
|
||||
|
||||
return new BenchmarkCorpus(
|
||||
dictionaryBuilder.toString(),
|
||||
lookupKeys.toArray(String[]::new),
|
||||
ambiguousLookupKeys.toArray(String[]::new));
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a compiled trie from benchmark corpus text.
|
||||
*
|
||||
* @param corpusText line-oriented dictionary text
|
||||
* @param reductionSettings reduction settings
|
||||
* @param storeOriginalStem whether the canonical stem itself should also be
|
||||
* inserted with the no-op patch
|
||||
* @return compiled trie containing patch commands
|
||||
* @throws IOException if parsing fails
|
||||
*/
|
||||
static FrequencyTrie<String> compilePatchTrie(
|
||||
final String corpusText,
|
||||
final ReductionSettings reductionSettings,
|
||||
final boolean storeOriginalStem) throws IOException {
|
||||
Objects.requireNonNull(corpusText, "corpusText");
|
||||
Objects.requireNonNull(reductionSettings, "reductionSettings");
|
||||
|
||||
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings);
|
||||
final PatchCommandEncoder encoder = new PatchCommandEncoder();
|
||||
|
||||
StemmerDictionaryParser.parse(
|
||||
new StringReader(corpusText),
|
||||
"benchmark-corpus",
|
||||
(stem, variants, lineNumber) -> {
|
||||
if (storeOriginalStem) {
|
||||
builder.put(stem, encoder.encode(stem, stem));
|
||||
}
|
||||
for (String variant : variants) {
|
||||
builder.put(variant, encoder.encode(variant, stem));
|
||||
}
|
||||
});
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates one deterministic stem.
|
||||
*
|
||||
* @param index stem ordinal
|
||||
* @return generated stem
|
||||
*/
|
||||
private static String createStem(final int index) {
|
||||
final String prefix = PREFIXES[index % PREFIXES.length];
|
||||
final String suffix = STEM_SUFFIXES[(index / PREFIXES.length) % STEM_SUFFIXES.length];
|
||||
return (prefix + suffix + base36(index)).toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a set of deterministic variants for one stem.
|
||||
*
|
||||
* @param stem canonical stem
|
||||
* @param random deterministic random source
|
||||
* @param index stem ordinal
|
||||
* @return generated variants in stable order
|
||||
*/
|
||||
private static String[] createVariants(final String stem, final SplittableRandom random, final int index) {
|
||||
final List<String> variants = new ArrayList<>(8);
|
||||
variants.add(stem + "s");
|
||||
variants.add(stem + "ed");
|
||||
variants.add(stem + "ing");
|
||||
variants.add(stem + "er");
|
||||
variants.add(stem + "ers");
|
||||
variants.add("pre" + stem);
|
||||
variants.add(stem + random.nextInt(10));
|
||||
|
||||
if ((index & 1) == 0) {
|
||||
variants.add(stem + "ly");
|
||||
}
|
||||
if (stem.length() > 5) {
|
||||
variants.add(stem.substring(0, stem.length() - 1));
|
||||
}
|
||||
return variants.toArray(String[]::new);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an ambiguous surface form shared by a small group of stems.
|
||||
*
|
||||
* @param index stem ordinal
|
||||
* @return shared homograph form
|
||||
*/
|
||||
private static String createHomograph(final int index) {
|
||||
return "shared" + base36(index / HOMOGRAPH_GROUP_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an ordinal into a compact base-36 discriminator.
|
||||
*
|
||||
* @param value numeric value
|
||||
* @return compact discriminator
|
||||
*/
|
||||
private static String base36(final int value) {
|
||||
return Integer.toString(value, Character.MAX_RADIX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Immutable benchmark corpus.
|
||||
*
|
||||
* @param dictionaryText full line-oriented dictionary text
|
||||
* @param lookupKeys keys used for general lookup measurements
|
||||
* @param ambiguousLookupKeys keys that return multiple patch candidates from
|
||||
* {@code getAll()}
|
||||
*/
|
||||
record BenchmarkCorpus(String dictionaryText, String[] lookupKeys, String[] ambiguousLookupKeys) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
package org.egothor.stemmer.benchmark;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.egothor.stemmer.ReductionMode;
|
||||
import org.egothor.stemmer.ReductionSettings;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
/**
|
||||
* Benchmarks end-to-end dictionary compilation for different reduction modes.
|
||||
*
|
||||
* <p>
|
||||
* This benchmark measures the offline path that matters for dictionary build
|
||||
* workflows: dictionary parsing, patch-command generation, mutable trie
|
||||
* population, subtree reduction, and freezing into the compiled read-only trie.
|
||||
* </p>
|
||||
*/
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@Warmup(iterations = 3, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
public class FrequencyTrieCompilationBenchmark {
|
||||
|
||||
/**
|
||||
* Shared benchmark state for compilation scenarios.
|
||||
*/
|
||||
@State(Scope.Benchmark)
|
||||
public static class CompilationState {
|
||||
|
||||
/**
|
||||
* Number of canonical stems to generate.
|
||||
*/
|
||||
@Param({ "2000", "10000" })
|
||||
public int stemCount;
|
||||
|
||||
/**
|
||||
* Reduction mode used during trie compilation.
|
||||
*/
|
||||
@Param({
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS",
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS",
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_DOMINANT_GET_RESULTS"
|
||||
})
|
||||
public String reductionMode;
|
||||
|
||||
/**
|
||||
* Whether to store the stem itself using the canonical no-op patch.
|
||||
*/
|
||||
@Param({ "true", "false" })
|
||||
public boolean storeOriginalStem;
|
||||
|
||||
/**
|
||||
* Full dictionary text used as the benchmark input.
|
||||
*/
|
||||
private String dictionaryText;
|
||||
|
||||
/**
|
||||
* Initializes the benchmark state.
|
||||
*/
|
||||
@Setup(Level.Trial)
|
||||
public void setUp() {
|
||||
this.dictionaryText = BenchmarkCorpusSupport.createCorpus(this.stemCount).dictionaryText();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measures end-to-end patch trie compilation latency.
|
||||
*
|
||||
* @param state prepared compilation state
|
||||
* @param blackhole sink preventing dead-code elimination
|
||||
* @throws IOException if dictionary parsing fails
|
||||
*/
|
||||
@Benchmark
|
||||
public void compilePatchTrie(final CompilationState state, final Blackhole blackhole) throws IOException {
|
||||
final ReductionSettings settings =
|
||||
ReductionSettings.withDefaults(ReductionMode.valueOf(state.reductionMode));
|
||||
blackhole.consume(
|
||||
BenchmarkCorpusSupport.compilePatchTrie(
|
||||
state.dictionaryText,
|
||||
settings,
|
||||
state.storeOriginalStem));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package org.egothor.stemmer.benchmark;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.egothor.stemmer.FrequencyTrie;
|
||||
import org.egothor.stemmer.PatchCommandEncoder;
|
||||
import org.egothor.stemmer.ReductionMode;
|
||||
import org.egothor.stemmer.ReductionSettings;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
/**
|
||||
* Benchmarks lookup-oriented operations on compiled Radixor tries.
|
||||
*
|
||||
* <p>
|
||||
* The benchmark uses a deterministic morphology-shaped corpus and measures the
|
||||
* latency of the hot-path lookup operations that are relevant at runtime:
|
||||
* retrieving the preferred patch command, retrieving all candidate patch
|
||||
* commands, and reconstructing stems from the returned patch values.
|
||||
* </p>
|
||||
*/
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 3, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
public class FrequencyTrieLookupBenchmark {
|
||||
|
||||
/**
|
||||
* Shared benchmark state for lookup scenarios.
|
||||
*/
|
||||
@State(Scope.Benchmark)
|
||||
public static class LookupState {
|
||||
|
||||
/**
|
||||
* Number of canonical stems to generate.
|
||||
*/
|
||||
@Param({ "2000", "10000" })
|
||||
public int stemCount;
|
||||
|
||||
/**
|
||||
* Reduction mode used to compile the lookup trie.
|
||||
*/
|
||||
@Param({
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS",
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS",
|
||||
"MERGE_SUBTREES_WITH_EQUIVALENT_DOMINANT_GET_RESULTS"
|
||||
})
|
||||
public String reductionMode;
|
||||
|
||||
/**
|
||||
* Compiled trie under test.
|
||||
*/
|
||||
private FrequencyTrie<String> trie;
|
||||
|
||||
/**
|
||||
* Deterministic lookup keys.
|
||||
*/
|
||||
private String[] lookupKeys;
|
||||
|
||||
/**
|
||||
* Keys that are known to return multiple patch candidates from
|
||||
* {@code getAll()}.
|
||||
*/
|
||||
private String[] ambiguousLookupKeys;
|
||||
|
||||
/**
|
||||
* Initializes the benchmark state.
|
||||
*
|
||||
* @throws IOException if corpus compilation fails
|
||||
*/
|
||||
@Setup(Level.Trial)
|
||||
public void setUp() throws IOException {
|
||||
final BenchmarkCorpusSupport.BenchmarkCorpus corpus = BenchmarkCorpusSupport.createCorpus(this.stemCount);
|
||||
final ReductionSettings settings =
|
||||
ReductionSettings.withDefaults(ReductionMode.valueOf(this.reductionMode));
|
||||
this.trie = BenchmarkCorpusSupport.compilePatchTrie(corpus.dictionaryText(), settings, true);
|
||||
this.lookupKeys = corpus.lookupKeys();
|
||||
this.ambiguousLookupKeys = corpus.ambiguousLookupKeys();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measures preferred patch lookup latency.
|
||||
*
|
||||
* @param state prepared lookup state
|
||||
* @param blackhole sink preventing dead-code elimination
|
||||
*/
|
||||
@Benchmark
|
||||
public void lookupPreferredPatch(final LookupState state, final Blackhole blackhole) {
|
||||
final String[] keys = state.lookupKeys;
|
||||
for (String key : keys) {
|
||||
final String patch = state.trie.get(key);
|
||||
if (patch == null) {
|
||||
throw new IllegalStateException("Missing preferred patch for key " + key + '.');
|
||||
}
|
||||
blackhole.consume(patch);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measures retrieval of all patch candidates on ambiguous forms.
|
||||
*
|
||||
* @param state prepared lookup state
|
||||
* @param blackhole sink preventing dead-code elimination
|
||||
*/
|
||||
@Benchmark
|
||||
public void lookupAllPatches(final LookupState state, final Blackhole blackhole) {
|
||||
final String[] keys = state.ambiguousLookupKeys;
|
||||
for (String key : keys) {
|
||||
final String[] patches = state.trie.getAll(key);
|
||||
if (patches.length < 2) {
|
||||
throw new IllegalStateException("Expected multiple patches for key " + key + '.');
|
||||
}
|
||||
blackhole.consume(patches);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measures end-to-end preferred stemming from lookup plus patch application.
|
||||
*
|
||||
* @param state prepared lookup state
|
||||
* @param blackhole sink preventing dead-code elimination
|
||||
*/
|
||||
@Benchmark
|
||||
public void stemPreferredVariant(final LookupState state, final Blackhole blackhole) {
|
||||
final String[] keys = state.lookupKeys;
|
||||
for (String key : keys) {
|
||||
final String patch = state.trie.get(key);
|
||||
blackhole.consume(PatchCommandEncoder.apply(key, patch));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measures end-to-end full candidate stemming from {@code getAll()} plus
|
||||
* patch application.
|
||||
*
|
||||
* @param state prepared lookup state
|
||||
* @param blackhole sink preventing dead-code elimination
|
||||
*/
|
||||
@Benchmark
|
||||
public void stemAllVariants(final LookupState state, final Blackhole blackhole) {
|
||||
final String[] keys = state.ambiguousLookupKeys;
|
||||
for (String key : keys) {
|
||||
final String[] patches = state.trie.getAll(key);
|
||||
for (String patch : patches) {
|
||||
blackhole.consume(PatchCommandEncoder.apply(key, patch));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
10
src/jmh/java/org/egothor/stemmer/benchmark/package-info.java
Normal file
10
src/jmh/java/org/egothor/stemmer/benchmark/package-info.java
Normal file
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* JMH benchmarks for the Radixor algorithmic core.
|
||||
*
|
||||
* <p>
|
||||
* The benchmarks in this package focus on trie lookup latency, retrieval of all
|
||||
* candidate patch commands, and end-to-end dictionary compilation with
|
||||
* different reduction modes.
|
||||
* </p>
|
||||
*/
|
||||
package org.egothor.stemmer.benchmark;
|
||||
Reference in New Issue
Block a user