diff --git a/.project b/.project
index 633dc14..5da9344 100644
--- a/.project
+++ b/.project
@@ -1,23 +1,22 @@
Radixor
- Project Radixor created by Buildship.
-
-
-
-
- org.eclipse.jdt.core.javabuilder
-
-
-
-
- org.eclipse.buildship.core.gradleprojectbuilder
-
-
-
-
+
+
org.eclipse.jdt.core.javanature
org.eclipse.buildship.core.gradleprojectnature
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+ org.eclipse.buildship.core.gradleprojectbuilder
+
+
+
+
+
diff --git a/build.gradle b/build.gradle
index da89edf..3ac336d 100644
--- a/build.gradle
+++ b/build.gradle
@@ -33,6 +33,9 @@ configurations {
java {
withSourcesJar()
withJavadocJar()
+
+ sourceCompatibility = JavaVersion.VERSION_21
+ targetCompatibility = JavaVersion.VERSION_21
}
tasks.withType(AbstractArchiveTask).configureEach {
@@ -51,18 +54,14 @@ pmd {
ruleSetFiles = files(rootProject.file(".ruleset"))
}
-tasks.withType(JavaCompile).configureEach {
- options.release = 21
-}
-
dependencyLocking {
lockAllConfigurations()
- lockMode = LockMode.STRICT
+ lockMode = LockMode.STRICT
}
dependencies {
- jmhImplementation sourceSets.main.output
+ jmhImplementation sourceSets.main.output
testImplementation platform(libs.junit.bom)
testImplementation libs.junit.jupiter
@@ -71,7 +70,7 @@ dependencies {
testImplementation libs.mockito.core
testImplementation libs.mockito.junit.jupiter
testImplementation libs.jqwik
-
+
mockitoAgent(libs.mockito.core) {
transitive = false
}
@@ -102,12 +101,12 @@ dependencyCheck {
delay = nvdApiKey != null ? 3500 : 8000
validForHours = 4
}
-
+
if (dependencyCheckSuppressionFile.exists()) {
suppressionFile = dependencyCheckSuppressionFile.absolutePath
failBuildOnUnusedSuppressionRule = true
}
-}
+}
tasks.withType(Test).configureEach {
useJUnitPlatform()
@@ -245,7 +244,7 @@ distributions {
into 'docs'
include '**/*.md'
}
-
+
from(layout.buildDirectory.dir('generated/release-notes')) {
into ''
include 'CHANGELOG.md'
diff --git a/src/main/java/org/egothor/stemmer/FrequencyTrie.java b/src/main/java/org/egothor/stemmer/FrequencyTrie.java
index 53aa3cf..4f2330e 100644
--- a/src/main/java/org/egothor/stemmer/FrequencyTrie.java
+++ b/src/main/java/org/egothor/stemmer/FrequencyTrie.java
@@ -95,11 +95,6 @@ public final class FrequencyTrie {
*/
private static final Logger LOGGER = Logger.getLogger(FrequencyTrie.class.getName());
- /**
- * Factory used to create correctly typed arrays for {@link #getAll(String)}.
- */
- private final IntFunction arrayFactory;
-
/**
* Root node of the compiled read-only trie.
*/
@@ -110,6 +105,26 @@ public final class FrequencyTrie {
*/
private final TrieMetadata metadata;
+ /**
+ * Cached traversal direction used for key lookup.
+ */
+ private final WordTraversalDirection lookupTraversalDirection;
+
+ /**
+ * Whether lookups require lowercase normalization.
+ */
+ private final boolean lowercasesLookupKeys;
+
+ /**
+ * Whether lookups require diacritic stripping.
+ */
+ private final boolean removeDiacritics;
+
+ /**
+ * Shared empty array instance for empty lookup results from {@link #getAll(String)}.
+ */
+ private final V[] emptyValues;
+
/**
* Binary format magic header.
*/
@@ -145,9 +160,12 @@ public final class FrequencyTrie {
*/
private FrequencyTrie(final IntFunction arrayFactory, final CompiledNode root,
final TrieMetadata metadata) {
- this.arrayFactory = Objects.requireNonNull(arrayFactory, "arrayFactory");
this.root = Objects.requireNonNull(root, "root");
this.metadata = Objects.requireNonNull(metadata, "metadata");
+ this.lookupTraversalDirection = metadata.traversalDirection();
+ this.lowercasesLookupKeys = metadata.caseProcessingMode() == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT;
+ this.removeDiacritics = metadata.diacriticProcessingMode() == DiacriticProcessingMode.REMOVE;
+ this.emptyValues = arrayFactory.apply(0);
}
/**
@@ -172,10 +190,14 @@ public final class FrequencyTrie {
public V get(final String key) {
Objects.requireNonNull(key, "key");
final CompiledNode node = findNode(normalizeLookupKey(key));
- if (node == null || node.orderedValues().length == 0) {
+ if (node == null) {
return null;
}
- return node.orderedValues()[0];
+ final V[] orderedValues = node.orderedValues();
+ if (orderedValues.length == 0) {
+ return null;
+ }
+ return orderedValues[0];
}
/**
@@ -201,13 +223,18 @@ public final class FrequencyTrie {
* value is stored at the addressed node
* @throws NullPointerException if {@code key} is {@code null}
*/
+ @SuppressWarnings("PMD.MethodReturnsInternalArray")
public V[] getAll(final String key) {
Objects.requireNonNull(key, "key");
final CompiledNode node = findNode(normalizeLookupKey(key));
- if (node == null || node.orderedValues().length == 0) {
- return this.arrayFactory.apply(0);
+ if (node == null) {
+ return this.emptyValues;
}
- return Arrays.copyOf(node.orderedValues(), node.orderedValues().length);
+ final V[] orderedValues = node.orderedValues();
+ if (orderedValues.length == 0) {
+ return this.emptyValues;
+ }
+ return Arrays.copyOf(orderedValues, orderedValues.length);
}
/**
@@ -232,16 +259,28 @@ public final class FrequencyTrie {
* if the key does not exist or no value is stored at the addressed node
* @throws NullPointerException if {@code key} is {@code null}
*/
+ @SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
public List> getEntries(final String key) {
Objects.requireNonNull(key, "key");
final CompiledNode node = findNode(normalizeLookupKey(key));
- if (node == null || node.orderedValues().length == 0) {
+ if (node == null) {
return List.of();
}
- final List> entries = new ArrayList<>(node.orderedValues().length);
- for (int index = 0; index < node.orderedValues().length; index++) {
- entries.add(new ValueCount<>(node.orderedValues()[index], node.orderedCounts()[index]));
+ final V[] orderedValues = node.orderedValues();
+ final int valueCount = orderedValues.length;
+ if (valueCount == 0) {
+ return List.of();
+ }
+
+ if (valueCount == 1) {
+ return List.of(new ValueCount<>(orderedValues[0], node.orderedCounts()[0]));
+ }
+
+ final int[] orderedCounts = node.orderedCounts();
+ final List> entries = new ArrayList<>(valueCount);
+ for (int index = 0; index < valueCount; index++) {
+ entries.add(new ValueCount<>(orderedValues[index], orderedCounts[index]));
}
return Collections.unmodifiableList(entries);
}
@@ -644,9 +683,18 @@ public final class FrequencyTrie {
*/
private CompiledNode findNode(final String key) {
CompiledNode current = this.root;
+ if (this.lookupTraversalDirection == WordTraversalDirection.BACKWARD) {
+ for (int traversalOffset = key.length() - 1; traversalOffset >= 0; traversalOffset--) {
+ current = current.findChild(key.charAt(traversalOffset));
+ if (current == null) {
+ return null;
+ }
+ }
+ return current;
+ }
+
for (int traversalOffset = 0; traversalOffset < key.length(); traversalOffset++) {
- current = current.findChild(
- key.charAt(this.metadata.traversalDirection().logicalIndex(key.length(), traversalOffset)));
+ current = current.findChild(key.charAt(traversalOffset));
if (current == null) {
return null;
}
@@ -661,13 +709,15 @@ public final class FrequencyTrie {
* @return normalized key for trie traversal
*/
private String normalizeLookupKey(final String key) {
- String normalized = key;
-
- if (this.metadata.caseProcessingMode() == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT) {
- normalized = normalized.toLowerCase(Locale.ROOT);
+ if (!this.lowercasesLookupKeys && !this.removeDiacritics) {
+ return key;
}
- if (this.metadata.diacriticProcessingMode() == DiacriticProcessingMode.REMOVE) {
+ String normalized = key;
+ if (this.lowercasesLookupKeys) {
+ normalized = normalized.toLowerCase(Locale.ROOT);
+ }
+ if (this.removeDiacritics) {
normalized = DiacriticStripper.strip(normalized);
} else if (this.metadata.diacriticProcessingMode() == DiacriticProcessingMode.AS_IS_AND_STRIPPED_FALLBACK) {
throw new UnsupportedOperationException(
diff --git a/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java b/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java
index 117f8ea..a5c1eb0 100644
--- a/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java
+++ b/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java
@@ -121,6 +121,16 @@ public final class PatchCommandEncoder {
*/
/* default */ static final String NOOP_PATCH = String.valueOf(new char[] { NOOP_OPCODE, NOOP_ARGUMENT });
+ /**
+ * Prefix used in unsupported NOOP patch argument exceptions.
+ */
+ private static final String MSG_NOOP = "Unsupported NOOP patch argument: ";
+
+ /**
+ * Prefix used in unsupported patch opcode exceptions.
+ */
+ private static final String MSG_OPCODE = "Unsupported patch opcode: ";
+
/**
* Safety penalty used to prevent a mismatch from being selected as a match.
*/
@@ -413,6 +423,9 @@ public final class PatchCommandEncoder {
if ((patchCommand.length() & 1) != 0) {
return source;
}
+ if (patchCommand.length() == 2) {
+ return applySingleBackwardInstruction(source, patchCommand.charAt(0), patchCommand.charAt(1));
+ }
final StringBuilder result = new StringBuilder(source);
if (result.isEmpty()) {
@@ -494,6 +507,9 @@ public final class PatchCommandEncoder {
if ((patchCommand.length() & 1) != 0) {
return source;
}
+ if (patchCommand.length() == 2) {
+ return applySingleForwardInstruction(source, patchCommand.charAt(0), patchCommand.charAt(1));
+ }
final StringBuilder result = new StringBuilder(source);
if (result.isEmpty()) {
@@ -552,6 +568,102 @@ public final class PatchCommandEncoder {
return result.toString();
}
+ /**
+ * Applies a single backward-direction patch instruction.
+ *
+ * @param source original source word
+ * @param opcode patch opcode
+ * @param argument encoded patch argument
+ * @return transformed source after one instruction
+ */
+ private static String applySingleBackwardInstruction(final String source, final char opcode, final char argument) {
+ final int sourceLength = source.length();
+ final int encodedValue;
+
+ switch (opcode) {
+ case DELETE_OPCODE:
+ encodedValue = decodeEncodedCount(argument);
+ if (encodedValue < 1 || encodedValue > sourceLength) {
+ return source;
+ }
+ return source.substring(0, sourceLength - encodedValue);
+
+ case INSERT_OPCODE:
+ final char[] insertTarget = new char[sourceLength + 1];
+ source.getChars(0, sourceLength, insertTarget, 0);
+ insertTarget[sourceLength] = argument;
+ return new String(insertTarget);
+
+ case REPLACE_OPCODE:
+ if (sourceLength == 0) {
+ return source;
+ }
+ final char[] replaceTarget = source.toCharArray();
+ replaceTarget[sourceLength - 1] = argument;
+ return new String(replaceTarget);
+
+ case SKIP_OPCODE:
+ return source;
+
+ case NOOP_OPCODE:
+ if (argument != NOOP_ARGUMENT) {
+ throw new IllegalArgumentException(MSG_NOOP + argument);
+ }
+ return source;
+
+ default:
+ throw new IllegalArgumentException(MSG_OPCODE + opcode);
+ }
+ }
+
+ /**
+ * Applies a single forward-direction patch instruction.
+ *
+ * @param source original source word
+ * @param opcode patch opcode
+ * @param argument encoded patch argument
+ * @return transformed source after one instruction
+ */
+ private static String applySingleForwardInstruction(final String source, final char opcode, final char argument) {
+ final int sourceLength = source.length();
+ final int encodedValue;
+
+ switch (opcode) {
+ case DELETE_OPCODE:
+ encodedValue = decodeEncodedCount(argument);
+ if (encodedValue < 1 || encodedValue > sourceLength) {
+ return source;
+ }
+ return source.substring(encodedValue);
+
+ case INSERT_OPCODE:
+ final char[] insertTarget = new char[sourceLength + 1];
+ insertTarget[0] = argument;
+ source.getChars(0, sourceLength, insertTarget, 1);
+ return new String(insertTarget);
+
+ case REPLACE_OPCODE:
+ if (sourceLength == 0) {
+ return source;
+ }
+ final char[] replaceTarget = source.toCharArray();
+ replaceTarget[0] = argument;
+ return new String(replaceTarget);
+
+ case SKIP_OPCODE:
+ return source;
+
+ case NOOP_OPCODE:
+ if (argument != NOOP_ARGUMENT) {
+ throw new IllegalArgumentException(MSG_NOOP + argument);
+ }
+ return source;
+
+ default:
+ throw new IllegalArgumentException(MSG_OPCODE + opcode);
+ }
+ }
+
/**
* Applies a backward patch command to an empty source word.
*
diff --git a/src/main/java/org/egothor/stemmer/trie/CompiledNode.java b/src/main/java/org/egothor/stemmer/trie/CompiledNode.java
index eb5d567..faff909 100644
--- a/src/main/java/org/egothor/stemmer/trie/CompiledNode.java
+++ b/src/main/java/org/egothor/stemmer/trie/CompiledNode.java
@@ -52,6 +52,11 @@ import java.util.Objects;
@SuppressWarnings("PMD.DataClass")
public record CompiledNode(char[] edgeLabels, CompiledNode[] children, V[] orderedValues, int... orderedCounts) {
+ /**
+ * Number of child edges where linear scan is cheaper than binary search.
+ */
+ private static final int LINEAR_CHILD_COUNT_THRESHOLD = 4;
+
/**
* Creates one validated compiled node.
*
@@ -140,6 +145,19 @@ public record CompiledNode(char[] edgeLabels, CompiledNode[] children, V[]
* @return child node, or {@code null} if absent
*/
public CompiledNode findChild(final char edge) {
+ final int childCount = this.edgeLabels.length;
+ if (childCount == 0) {
+ return null;
+ }
+ if (childCount <= LINEAR_CHILD_COUNT_THRESHOLD) {
+ for (int index = 0; index < childCount; index++) {
+ if (this.edgeLabels[index] == edge) {
+ return this.children[index];
+ }
+ }
+ return null;
+ }
+
final int index = Arrays.binarySearch(this.edgeLabels, edge);
if (index < 0) {
return null;