From 50c3ab34323306b4ae2eab30753889e3ffd68440 Mon Sep 17 00:00:00 2001 From: Leo Galambos Date: Fri, 15 May 2026 18:35:11 +0200 Subject: [PATCH] fix: Performance fixes --- .project | 29 +++-- build.gradle | 19 ++- .../org/egothor/stemmer/FrequencyTrie.java | 94 +++++++++++---- .../egothor/stemmer/PatchCommandEncoder.java | 112 ++++++++++++++++++ .../egothor/stemmer/trie/CompiledNode.java | 18 +++ 5 files changed, 225 insertions(+), 47 deletions(-) diff --git a/.project b/.project index 633dc14..5da9344 100644 --- a/.project +++ b/.project @@ -1,23 +1,22 @@ Radixor - Project Radixor created by Buildship. - - - - - org.eclipse.jdt.core.javabuilder - - - - - org.eclipse.buildship.core.gradleprojectbuilder - - - - + + org.eclipse.jdt.core.javanature org.eclipse.buildship.core.gradleprojectnature + + + org.eclipse.jdt.core.javabuilder + + + + org.eclipse.buildship.core.gradleprojectbuilder + + + + + diff --git a/build.gradle b/build.gradle index da89edf..3ac336d 100644 --- a/build.gradle +++ b/build.gradle @@ -33,6 +33,9 @@ configurations { java { withSourcesJar() withJavadocJar() + + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 } tasks.withType(AbstractArchiveTask).configureEach { @@ -51,18 +54,14 @@ pmd { ruleSetFiles = files(rootProject.file(".ruleset")) } -tasks.withType(JavaCompile).configureEach { - options.release = 21 -} - dependencyLocking { lockAllConfigurations() - lockMode = LockMode.STRICT + lockMode = LockMode.STRICT } dependencies { - jmhImplementation sourceSets.main.output + jmhImplementation sourceSets.main.output testImplementation platform(libs.junit.bom) testImplementation libs.junit.jupiter @@ -71,7 +70,7 @@ dependencies { testImplementation libs.mockito.core testImplementation libs.mockito.junit.jupiter testImplementation libs.jqwik - + mockitoAgent(libs.mockito.core) { transitive = false } @@ -102,12 +101,12 @@ dependencyCheck { delay = nvdApiKey != null ? 3500 : 8000 validForHours = 4 } - + if (dependencyCheckSuppressionFile.exists()) { suppressionFile = dependencyCheckSuppressionFile.absolutePath failBuildOnUnusedSuppressionRule = true } -} +} tasks.withType(Test).configureEach { useJUnitPlatform() @@ -245,7 +244,7 @@ distributions { into 'docs' include '**/*.md' } - + from(layout.buildDirectory.dir('generated/release-notes')) { into '' include 'CHANGELOG.md' diff --git a/src/main/java/org/egothor/stemmer/FrequencyTrie.java b/src/main/java/org/egothor/stemmer/FrequencyTrie.java index 53aa3cf..4f2330e 100644 --- a/src/main/java/org/egothor/stemmer/FrequencyTrie.java +++ b/src/main/java/org/egothor/stemmer/FrequencyTrie.java @@ -95,11 +95,6 @@ public final class FrequencyTrie { */ private static final Logger LOGGER = Logger.getLogger(FrequencyTrie.class.getName()); - /** - * Factory used to create correctly typed arrays for {@link #getAll(String)}. - */ - private final IntFunction arrayFactory; - /** * Root node of the compiled read-only trie. */ @@ -110,6 +105,26 @@ public final class FrequencyTrie { */ private final TrieMetadata metadata; + /** + * Cached traversal direction used for key lookup. + */ + private final WordTraversalDirection lookupTraversalDirection; + + /** + * Whether lookups require lowercase normalization. + */ + private final boolean lowercasesLookupKeys; + + /** + * Whether lookups require diacritic stripping. + */ + private final boolean removeDiacritics; + + /** + * Shared empty array instance for empty lookup results from {@link #getAll(String)}. + */ + private final V[] emptyValues; + /** * Binary format magic header. */ @@ -145,9 +160,12 @@ public final class FrequencyTrie { */ private FrequencyTrie(final IntFunction arrayFactory, final CompiledNode root, final TrieMetadata metadata) { - this.arrayFactory = Objects.requireNonNull(arrayFactory, "arrayFactory"); this.root = Objects.requireNonNull(root, "root"); this.metadata = Objects.requireNonNull(metadata, "metadata"); + this.lookupTraversalDirection = metadata.traversalDirection(); + this.lowercasesLookupKeys = metadata.caseProcessingMode() == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT; + this.removeDiacritics = metadata.diacriticProcessingMode() == DiacriticProcessingMode.REMOVE; + this.emptyValues = arrayFactory.apply(0); } /** @@ -172,10 +190,14 @@ public final class FrequencyTrie { public V get(final String key) { Objects.requireNonNull(key, "key"); final CompiledNode node = findNode(normalizeLookupKey(key)); - if (node == null || node.orderedValues().length == 0) { + if (node == null) { return null; } - return node.orderedValues()[0]; + final V[] orderedValues = node.orderedValues(); + if (orderedValues.length == 0) { + return null; + } + return orderedValues[0]; } /** @@ -201,13 +223,18 @@ public final class FrequencyTrie { * value is stored at the addressed node * @throws NullPointerException if {@code key} is {@code null} */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public V[] getAll(final String key) { Objects.requireNonNull(key, "key"); final CompiledNode node = findNode(normalizeLookupKey(key)); - if (node == null || node.orderedValues().length == 0) { - return this.arrayFactory.apply(0); + if (node == null) { + return this.emptyValues; } - return Arrays.copyOf(node.orderedValues(), node.orderedValues().length); + final V[] orderedValues = node.orderedValues(); + if (orderedValues.length == 0) { + return this.emptyValues; + } + return Arrays.copyOf(orderedValues, orderedValues.length); } /** @@ -232,16 +259,28 @@ public final class FrequencyTrie { * if the key does not exist or no value is stored at the addressed node * @throws NullPointerException if {@code key} is {@code null} */ + @SuppressWarnings("PMD.AvoidLiteralsInIfCondition") public List> getEntries(final String key) { Objects.requireNonNull(key, "key"); final CompiledNode node = findNode(normalizeLookupKey(key)); - if (node == null || node.orderedValues().length == 0) { + if (node == null) { return List.of(); } - final List> entries = new ArrayList<>(node.orderedValues().length); - for (int index = 0; index < node.orderedValues().length; index++) { - entries.add(new ValueCount<>(node.orderedValues()[index], node.orderedCounts()[index])); + final V[] orderedValues = node.orderedValues(); + final int valueCount = orderedValues.length; + if (valueCount == 0) { + return List.of(); + } + + if (valueCount == 1) { + return List.of(new ValueCount<>(orderedValues[0], node.orderedCounts()[0])); + } + + final int[] orderedCounts = node.orderedCounts(); + final List> entries = new ArrayList<>(valueCount); + for (int index = 0; index < valueCount; index++) { + entries.add(new ValueCount<>(orderedValues[index], orderedCounts[index])); } return Collections.unmodifiableList(entries); } @@ -644,9 +683,18 @@ public final class FrequencyTrie { */ private CompiledNode findNode(final String key) { CompiledNode current = this.root; + if (this.lookupTraversalDirection == WordTraversalDirection.BACKWARD) { + for (int traversalOffset = key.length() - 1; traversalOffset >= 0; traversalOffset--) { + current = current.findChild(key.charAt(traversalOffset)); + if (current == null) { + return null; + } + } + return current; + } + for (int traversalOffset = 0; traversalOffset < key.length(); traversalOffset++) { - current = current.findChild( - key.charAt(this.metadata.traversalDirection().logicalIndex(key.length(), traversalOffset))); + current = current.findChild(key.charAt(traversalOffset)); if (current == null) { return null; } @@ -661,13 +709,15 @@ public final class FrequencyTrie { * @return normalized key for trie traversal */ private String normalizeLookupKey(final String key) { - String normalized = key; - - if (this.metadata.caseProcessingMode() == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT) { - normalized = normalized.toLowerCase(Locale.ROOT); + if (!this.lowercasesLookupKeys && !this.removeDiacritics) { + return key; } - if (this.metadata.diacriticProcessingMode() == DiacriticProcessingMode.REMOVE) { + String normalized = key; + if (this.lowercasesLookupKeys) { + normalized = normalized.toLowerCase(Locale.ROOT); + } + if (this.removeDiacritics) { normalized = DiacriticStripper.strip(normalized); } else if (this.metadata.diacriticProcessingMode() == DiacriticProcessingMode.AS_IS_AND_STRIPPED_FALLBACK) { throw new UnsupportedOperationException( diff --git a/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java b/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java index 117f8ea..a5c1eb0 100644 --- a/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java +++ b/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java @@ -121,6 +121,16 @@ public final class PatchCommandEncoder { */ /* default */ static final String NOOP_PATCH = String.valueOf(new char[] { NOOP_OPCODE, NOOP_ARGUMENT }); + /** + * Prefix used in unsupported NOOP patch argument exceptions. + */ + private static final String MSG_NOOP = "Unsupported NOOP patch argument: "; + + /** + * Prefix used in unsupported patch opcode exceptions. + */ + private static final String MSG_OPCODE = "Unsupported patch opcode: "; + /** * Safety penalty used to prevent a mismatch from being selected as a match. */ @@ -413,6 +423,9 @@ public final class PatchCommandEncoder { if ((patchCommand.length() & 1) != 0) { return source; } + if (patchCommand.length() == 2) { + return applySingleBackwardInstruction(source, patchCommand.charAt(0), patchCommand.charAt(1)); + } final StringBuilder result = new StringBuilder(source); if (result.isEmpty()) { @@ -494,6 +507,9 @@ public final class PatchCommandEncoder { if ((patchCommand.length() & 1) != 0) { return source; } + if (patchCommand.length() == 2) { + return applySingleForwardInstruction(source, patchCommand.charAt(0), patchCommand.charAt(1)); + } final StringBuilder result = new StringBuilder(source); if (result.isEmpty()) { @@ -552,6 +568,102 @@ public final class PatchCommandEncoder { return result.toString(); } + /** + * Applies a single backward-direction patch instruction. + * + * @param source original source word + * @param opcode patch opcode + * @param argument encoded patch argument + * @return transformed source after one instruction + */ + private static String applySingleBackwardInstruction(final String source, final char opcode, final char argument) { + final int sourceLength = source.length(); + final int encodedValue; + + switch (opcode) { + case DELETE_OPCODE: + encodedValue = decodeEncodedCount(argument); + if (encodedValue < 1 || encodedValue > sourceLength) { + return source; + } + return source.substring(0, sourceLength - encodedValue); + + case INSERT_OPCODE: + final char[] insertTarget = new char[sourceLength + 1]; + source.getChars(0, sourceLength, insertTarget, 0); + insertTarget[sourceLength] = argument; + return new String(insertTarget); + + case REPLACE_OPCODE: + if (sourceLength == 0) { + return source; + } + final char[] replaceTarget = source.toCharArray(); + replaceTarget[sourceLength - 1] = argument; + return new String(replaceTarget); + + case SKIP_OPCODE: + return source; + + case NOOP_OPCODE: + if (argument != NOOP_ARGUMENT) { + throw new IllegalArgumentException(MSG_NOOP + argument); + } + return source; + + default: + throw new IllegalArgumentException(MSG_OPCODE + opcode); + } + } + + /** + * Applies a single forward-direction patch instruction. + * + * @param source original source word + * @param opcode patch opcode + * @param argument encoded patch argument + * @return transformed source after one instruction + */ + private static String applySingleForwardInstruction(final String source, final char opcode, final char argument) { + final int sourceLength = source.length(); + final int encodedValue; + + switch (opcode) { + case DELETE_OPCODE: + encodedValue = decodeEncodedCount(argument); + if (encodedValue < 1 || encodedValue > sourceLength) { + return source; + } + return source.substring(encodedValue); + + case INSERT_OPCODE: + final char[] insertTarget = new char[sourceLength + 1]; + insertTarget[0] = argument; + source.getChars(0, sourceLength, insertTarget, 1); + return new String(insertTarget); + + case REPLACE_OPCODE: + if (sourceLength == 0) { + return source; + } + final char[] replaceTarget = source.toCharArray(); + replaceTarget[0] = argument; + return new String(replaceTarget); + + case SKIP_OPCODE: + return source; + + case NOOP_OPCODE: + if (argument != NOOP_ARGUMENT) { + throw new IllegalArgumentException(MSG_NOOP + argument); + } + return source; + + default: + throw new IllegalArgumentException(MSG_OPCODE + opcode); + } + } + /** * Applies a backward patch command to an empty source word. * diff --git a/src/main/java/org/egothor/stemmer/trie/CompiledNode.java b/src/main/java/org/egothor/stemmer/trie/CompiledNode.java index eb5d567..faff909 100644 --- a/src/main/java/org/egothor/stemmer/trie/CompiledNode.java +++ b/src/main/java/org/egothor/stemmer/trie/CompiledNode.java @@ -52,6 +52,11 @@ import java.util.Objects; @SuppressWarnings("PMD.DataClass") public record CompiledNode(char[] edgeLabels, CompiledNode[] children, V[] orderedValues, int... orderedCounts) { + /** + * Number of child edges where linear scan is cheaper than binary search. + */ + private static final int LINEAR_CHILD_COUNT_THRESHOLD = 4; + /** * Creates one validated compiled node. * @@ -140,6 +145,19 @@ public record CompiledNode(char[] edgeLabels, CompiledNode[] children, V[] * @return child node, or {@code null} if absent */ public CompiledNode findChild(final char edge) { + final int childCount = this.edgeLabels.length; + if (childCount == 0) { + return null; + } + if (childCount <= LINEAR_CHILD_COUNT_THRESHOLD) { + for (int index = 0; index < childCount; index++) { + if (this.edgeLabels[index] == edge) { + return this.children[index]; + } + } + return null; + } + final int index = Arrays.binarySearch(this.edgeLabels, edge); if (index < 0) { return null;