feat: Prepare TrieMetadata and new stemmer data integration

2026-04-23 20:21:46 +02:00
parent a9d15fa3ae
commit 4d939f5b6e
77 changed files with 3024 additions and 179778 deletions
--- a/src/jmh/java/org/egothor/stemmer/benchmark/EnglishStemmerComparisonBenchmark.java
+++ b/src/jmh/java/org/egothor/stemmer/benchmark/EnglishStemmerComparisonBenchmark.java
@@ -59,8 +59,7 @@ import org.tartarus.snowball.ext.porterStemmer;
 * The benchmark processes the same deterministic token array with:
 * </p>
 * <ul>
- * <li>Radixor using bundled
- * {@link StemmerPatchTrieLoader.Language#US_UK_PROFI}</li>
+ * <li>Radixor using bundled {@link StemmerPatchTrieLoader.Language#US_UK}</li>
 * <li>Snowball original Porter stemmer</li>
 * <li>Snowball English stemmer, commonly referred to as Porter2</li>
 * </ul>
@@ -106,7 +105,7 @@ public class EnglishStemmerComparisonBenchmark {
        @Setup(Level.Trial)
        public void setUp() throws IOException {
            this.tokens = EnglishComparisonCorpus.createTokens(this.familyCount);
-            this.radixorTrie = StemmerPatchTrieLoader.load(StemmerPatchTrieLoader.Language.US_UK_PROFI, true,
+            this.radixorTrie = StemmerPatchTrieLoader.load(StemmerPatchTrieLoader.Language.US_UK, true,
                    ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS);
        }
    }
--- a/src/main/java/org/egothor/stemmer/Compile.java
+++ b/src/main/java/org/egothor/stemmer/Compile.java
@@ -149,8 +149,10 @@ public final class Compile {
        final ReductionSettings reductionSettings = new ReductionSettings(arguments.reductionMode(),
                arguments.dominantWinnerMinPercent(), arguments.dominantWinnerOverSecondRatio());

+        final WordTraversalDirection traversalDirection = arguments.rightToLeft() ? WordTraversalDirection.FORWARD
+                : WordTraversalDirection.BACKWARD;
        final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(arguments.inputFile(), arguments.storeOriginal(),
-                reductionSettings);
+                reductionSettings, traversalDirection);

        final Path outputFile = arguments.outputFile();
        final Path parent = outputFile.toAbsolutePath().getParent();
@@ -166,11 +168,11 @@ public final class Compile {

        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.log(Level.INFO,
-                    "Compiled dictionary {0} to {1} using mode {2}, storeOriginal={3}, dominantWinnerMinPercent={4}, dominantWinnerOverSecondRatio={5}.",
+                    "Compiled dictionary {0} to {1} using mode {2}, storeOriginal={3}, rightToLeft={4}, dominantWinnerMinPercent={5}, dominantWinnerOverSecondRatio={6}.",
                    new Object[] { arguments.inputFile().toAbsolutePath().toString(),
                            arguments.outputFile().toAbsolutePath().toString(), arguments.reductionMode().name(),
-                            arguments.storeOriginal(), arguments.dominantWinnerMinPercent(),
-                            arguments.dominantWinnerOverSecondRatio() });
+                            arguments.storeOriginal(), arguments.rightToLeft(),
+                            arguments.dominantWinnerMinPercent(), arguments.dominantWinnerOverSecondRatio() });
        }
    }

@@ -188,6 +190,16 @@ public final class Compile {
        System.err.println("      [--dominant-winner-over-second-ratio <1..n>] \\");
        System.err.println("      [--overwrite]");
        System.err.println();
+        System.err.println("Options:");
+        System.err.println("  --store-original");
+        System.err.println("      Inserts each canonical stem itself using the no-operation patch.");
+        System.err.println("  --right-to-left");
+        System.err.println("      Uses forward word traversal for right-to-left languages.");
+        System.err.println("      In this mode, trie keys are constructed from the logical beginning");
+        System.err.println("      of the stored word form and patch commands are encoded likewise.");
+        System.err.println("  --overwrite");
+        System.err.println("      Replaces the target file when it already exists.");
+        System.err.println();
        System.err.println("Supported reduction modes:");
        for (ReductionMode mode : ReductionMode.values()) {
            System.err.println("  " + mode.name());
@@ -240,6 +252,8 @@ public final class Compile {
     * @param outputFile                    output compressed trie file
     * @param reductionMode                 subtree reduction mode
     * @param storeOriginal                 whether original stems are stored
+     * @param rightToLeft                   whether dictionary compilation should use
+     *                                      forward traversal on stored word forms
     * @param dominantWinnerMinPercent      dominant winner minimum percent
     * @param dominantWinnerOverSecondRatio dominant winner over second ratio
     * @param overwrite                     whether an existing output may be
@@ -248,7 +262,8 @@ public final class Compile {
     */
    @SuppressWarnings("PMD.LongVariable")
    private record Arguments(Path inputFile, Path outputFile, ReductionMode reductionMode, boolean storeOriginal,
-            int dominantWinnerMinPercent, int dominantWinnerOverSecondRatio, boolean overwrite, boolean help) {
+            boolean rightToLeft, int dominantWinnerMinPercent, int dominantWinnerOverSecondRatio, boolean overwrite,
+            boolean help) {

        /**
         * Parses raw command-line arguments.
@@ -264,6 +279,7 @@ public final class Compile {
            Path outputFile = null;
            ReductionMode reductionMode = null;
            boolean storeOriginal = false;
+            boolean rightToLeft = false;
            boolean overwrite = false;
            boolean help = false;
            int dominantWinnerMinPercent = ReductionSettings.DEFAULT_DOMINANT_WINNER_MIN_PERCENT;
@@ -286,6 +302,10 @@ public final class Compile {
                        overwrite = true;
                        break;

+                    case "--right-to-left":
+                        rightToLeft = true;
+                        break;
+
                    case "--input":
                        inputFile = Path.of(requireValue(arguments, ++index, "--input"));
                        break;
@@ -317,8 +337,8 @@ public final class Compile {
            }

            if (help) {
-                return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, dominantWinnerMinPercent,
-                        dominantWinnerOverSecondRatio, overwrite, true);
+                return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, rightToLeft,
+                        dominantWinnerMinPercent, dominantWinnerOverSecondRatio, overwrite, true);
            }

            if (inputFile == null) {
@@ -331,8 +351,8 @@ public final class Compile {
                throw new IllegalArgumentException("Missing required argument --reduction-mode.");
            }

-            return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, dominantWinnerMinPercent,
-                    dominantWinnerOverSecondRatio, overwrite, false);
+            return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, rightToLeft,
+                    dominantWinnerMinPercent, dominantWinnerOverSecondRatio, overwrite, false);
        }

        /**
--- a/src/main/java/org/egothor/stemmer/DiacriticProcessingMode.java
+++ b/src/main/java/org/egothor/stemmer/DiacriticProcessingMode.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+/**
+ * Defines how dictionary loading and trie traversal should treat diacritics.
+ *
+ * <p>
+ * The current implementation preserves the original stored form only, but the
+ * enum is intentionally modeled as persisted metadata so that future compiled
+ * trie artifacts can explicitly declare whether they were built with exact
+ * diacritic matching, normalized matching, or a dual-path fallback strategy.
+ * </p>
+ */
+public enum DiacriticProcessingMode {
+
+    /**
+     * Preserves the original stored form exactly as provided by the source
+     * dictionary.
+     */
+    AS_IS,
+
+    /**
+     * Indicates that diacritics were removed before trie construction.
+     */
+    REMOVE,
+
+    /**
+     * Indicates that lookup may continue along both the original diacritic edge and
+     * a normalized non-diacritic alternative.
+     */
+    AS_IS_AND_STRIPPED_FALLBACK
+}
--- a/src/main/java/org/egothor/stemmer/FrequencyTrie.java
+++ b/src/main/java/org/egothor/stemmer/FrequencyTrie.java
@@ -101,7 +101,7 @@ public final class FrequencyTrie<V> {
    /**
     * Binary format version.
     */
-    private static final int STREAM_VERSION = 1;
+    private static final int STREAM_VERSION = 3;

    /**
     * Factory used to create correctly typed arrays for {@link #getAll(String)}.
@@ -113,16 +113,24 @@ public final class FrequencyTrie<V> {
     */
    private final CompiledNode<V> root;

+    /**
+     * Metadata persisted together with this trie.
+     */
+    private final TrieMetadata metadata;
+
    /**
     * Creates a new compiled trie instance.
     *
-     * @param arrayFactory array factory
-     * @param root         compiled root node
+     * @param arrayFactory       array factory
+     * @param root               compiled root node
+     * @param traversalDirection logical key traversal direction
     * @throws NullPointerException if any argument is {@code null}
     */
-    private FrequencyTrie(final IntFunction<V[]> arrayFactory, final CompiledNode<V> root) {
+    private FrequencyTrie(final IntFunction<V[]> arrayFactory, final CompiledNode<V> root,
+            final TrieMetadata metadata) {
        this.arrayFactory = Objects.requireNonNull(arrayFactory, "arrayFactory");
        this.root = Objects.requireNonNull(root, "root");
+        this.metadata = Objects.requireNonNull(metadata, "metadata");
    }

    /**
@@ -213,6 +221,29 @@ public final class FrequencyTrie<V> {
        return Collections.unmodifiableList(entries);
    }

+    /**
+     * Returns the logical key traversal direction used by this trie.
+     *
+     * <p>
+     * The same direction must be used when reconstructing mutable builders or when
+     * applying patch commands that were generated against keys stored in this trie.
+     * </p>
+     *
+     * @return logical key traversal direction
+     */
+    public WordTraversalDirection traversalDirection() {
+        return this.metadata.traversalDirection();
+    }
+
+    /**
+     * Returns immutable persisted metadata associated with this trie.
+     *
+     * @return trie metadata
+     */
+    public TrieMetadata metadata() {
+        return this.metadata;
+    }
+
    /**
     * Returns the root node mainly for diagnostics and tests within the package.
     *
@@ -262,6 +293,7 @@ public final class FrequencyTrie<V> {
        dataOutput.writeInt(STREAM_VERSION);
        dataOutput.writeInt(orderedNodes.size());
        dataOutput.writeInt(nodeIds.get(this.root));
+        writeMetadata(dataOutput, this.metadata);

        for (CompiledNode<V> node : orderedNodes) {
            writeNode(dataOutput, valueCodec, node, nodeIds);
@@ -304,7 +336,7 @@ public final class FrequencyTrie<V> {
        }

        final int version = dataInput.readInt();
-        if (version != STREAM_VERSION) {
+        if (version != 1 && version != STREAM_VERSION) {
            throw new IOException("Unsupported trie stream version: " + version);
        }

@@ -318,6 +350,8 @@ public final class FrequencyTrie<V> {
            throw new IOException("Invalid root node id: " + rootNodeId);
        }

+        final TrieMetadata metadata = readMetadata(dataInput, version);
+
        final CompiledNode<V>[] nodes = readNodes(dataInput, arrayFactory, valueCodec, nodeCount);
        final CompiledNode<V> rootNode = nodes[rootNodeId];

@@ -325,7 +359,70 @@ public final class FrequencyTrie<V> {
            LOGGER.log(Level.FINE, "Read compiled trie with {0} canonical nodes.", nodeCount);
        }

-        return new FrequencyTrie<>(arrayFactory, rootNode);
+        return new FrequencyTrie<>(arrayFactory, rootNode, metadata);
+    }
+
+    /**
+     * Writes persisted trie metadata.
+     *
+     * @param dataOutput output stream
+     * @param metadata   metadata to serialize
+     * @throws IOException if writing fails
+     */
+    private static void writeMetadata(final DataOutputStream dataOutput, final TrieMetadata metadata)
+            throws IOException {
+        dataOutput.writeInt(metadata.traversalDirection().ordinal());
+        dataOutput.writeInt(metadata.reductionSettings().reductionMode().ordinal());
+        dataOutput.writeInt(metadata.reductionSettings().dominantWinnerMinPercent());
+        dataOutput.writeInt(metadata.reductionSettings().dominantWinnerOverSecondRatio());
+        dataOutput.writeInt(metadata.diacriticProcessingMode().ordinal());
+    }
+
+    /**
+     * Reads persisted trie metadata while remaining backward compatible with
+     * earlier stream versions.
+     *
+     * @param dataInput input stream
+     * @param version   persisted stream version
+     * @return deserialized metadata
+     * @throws IOException if the metadata section is invalid
+     */
+    private static TrieMetadata readMetadata(final DataInputStream dataInput, final int version) throws IOException {
+        final WordTraversalDirection traversalDirection;
+        if (version >= 2) { // NOPMD
+            final int traversalDirectionOrdinal = dataInput.readInt();
+            final WordTraversalDirection[] traversalDirections = WordTraversalDirection.values();
+            if (traversalDirectionOrdinal < 0 || traversalDirectionOrdinal >= traversalDirections.length) {
+                throw new IOException("Invalid traversal direction ordinal: " + traversalDirectionOrdinal);
+            }
+            traversalDirection = traversalDirections[traversalDirectionOrdinal];
+        } else {
+            traversalDirection = WordTraversalDirection.BACKWARD;
+        }
+
+        if (version < 3) { // NOPMD
+            return TrieMetadata.legacy(version, traversalDirection);
+        }
+
+        final ReductionMode[] reductionModes = ReductionMode.values();
+        final int reductionModeOrdinal = dataInput.readInt();
+        if (reductionModeOrdinal < 0 || reductionModeOrdinal >= reductionModes.length) {
+            throw new IOException("Invalid reduction mode ordinal: " + reductionModeOrdinal);
+        }
+
+        final int dominantWinnerMinPercent = dataInput.readInt();
+        final int dominantWinnerOverSecondRatio = dataInput.readInt(); // NOPMD
+
+        final DiacriticProcessingMode[] diacriticProcessingModes = DiacriticProcessingMode.values();
+        final int diacriticProcessingModeOrdinal = dataInput.readInt(); // NOPMD
+        if (diacriticProcessingModeOrdinal < 0 || diacriticProcessingModeOrdinal >= diacriticProcessingModes.length) {
+            throw new IOException("Invalid diacritic processing mode ordinal: " + diacriticProcessingModeOrdinal);
+        }
+
+        return new TrieMetadata(
+                version, traversalDirection, new ReductionSettings(reductionModes[reductionModeOrdinal],
+                        dominantWinnerMinPercent, dominantWinnerOverSecondRatio),
+                diacriticProcessingModes[diacriticProcessingModeOrdinal]);
    }

    /**
@@ -506,8 +603,9 @@ public final class FrequencyTrie<V> {
     */
    private CompiledNode<V> findNode(final String key) {
        CompiledNode<V> current = this.root;
-        for (int index = 0; index < key.length(); index++) {
-            current = current.findChild(key.charAt(index));
+        for (int traversalOffset = 0; traversalOffset < key.length(); traversalOffset++) {
+            current = current.findChild(
+                    key.charAt(this.metadata.traversalDirection().logicalIndex(key.length(), traversalOffset)));
            if (current == null) {
                return null;
            }
@@ -544,6 +642,11 @@ public final class FrequencyTrie<V> {
         */
        private final ReductionSettings reductionSettings;

+        /**
+         * Logical key traversal direction used by this builder.
+         */
+        private final WordTraversalDirection traversalDirection;
+
        /**
         * Mutable root node.
         */
@@ -552,13 +655,33 @@ public final class FrequencyTrie<V> {
        /**
         * Creates a new builder with the provided settings.
         *
+         * <p>
+         * This constructor preserves the historical Egothor behavior and therefore
+         * traverses logical keys from their end toward their beginning.
+         * </p>
+         *
         * @param arrayFactory      array factory
         * @param reductionSettings reduction configuration
         * @throws NullPointerException if any argument is {@code null}
         */
        public Builder(final IntFunction<V[]> arrayFactory, final ReductionSettings reductionSettings) {
+            this(arrayFactory, reductionSettings, WordTraversalDirection.BACKWARD);
+        }
+
+        /**
+         * Creates a new builder with the provided settings and explicit traversal
+         * direction.
+         *
+         * @param arrayFactory       array factory
+         * @param reductionSettings  reduction configuration
+         * @param traversalDirection logical key traversal direction
+         * @throws NullPointerException if any argument is {@code null}
+         */
+        public Builder(final IntFunction<V[]> arrayFactory, final ReductionSettings reductionSettings,
+                final WordTraversalDirection traversalDirection) {
            this.arrayFactory = Objects.requireNonNull(arrayFactory, "arrayFactory");
            this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings");
+            this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
            this.root = new MutableNode<>();
        }

@@ -566,12 +689,31 @@ public final class FrequencyTrie<V> {
         * Creates a new builder using default thresholds for the supplied reduction
         * mode.
         *
+         * <p>
+         * This constructor preserves the historical Egothor behavior and therefore
+         * traverses logical keys from their end toward their beginning.
+         * </p>
+         *
         * @param arrayFactory  array factory
         * @param reductionMode reduction mode
         * @throws NullPointerException if any argument is {@code null}
         */
        public Builder(final IntFunction<V[]> arrayFactory, final ReductionMode reductionMode) {
-            this(arrayFactory, ReductionSettings.withDefaults(reductionMode));
+            this(arrayFactory, ReductionSettings.withDefaults(reductionMode), WordTraversalDirection.BACKWARD);
+        }
+
+        /**
+         * Creates a new builder using default thresholds for the supplied reduction
+         * mode and explicit traversal direction.
+         *
+         * @param arrayFactory       array factory
+         * @param reductionMode      reduction mode
+         * @param traversalDirection logical key traversal direction
+         * @throws NullPointerException if any argument is {@code null}
+         */
+        public Builder(final IntFunction<V[]> arrayFactory, final ReductionMode reductionMode,
+                final WordTraversalDirection traversalDirection) {
+            this(arrayFactory, ReductionSettings.withDefaults(reductionMode), traversalDirection);
        }

        /**
@@ -611,7 +753,9 @@ public final class FrequencyTrie<V> {
                        reductionContext.canonicalNodeCount());
            }

-            return new FrequencyTrie<>(this.arrayFactory, compiledRoot);
+            final TrieMetadata metadata = TrieMetadata.current(STREAM_VERSION, this.traversalDirection,
+                    this.reductionSettings);
+            return new FrequencyTrie<>(this.arrayFactory, compiledRoot, metadata);
        }

        /**
@@ -646,8 +790,8 @@ public final class FrequencyTrie<V> {
            }

            MutableNode<V> current = this.root;
-            for (int index = 0; index < key.length(); index++) {
-                final Character edge = key.charAt(index);
+            for (int traversalOffset = 0; traversalOffset < key.length(); traversalOffset++) {
+                final Character edge = key.charAt(this.traversalDirection.logicalIndex(key.length(), traversalOffset));
                MutableNode<V> child = current.children().get(edge);
                if (child == null) {
                    child = new MutableNode<>(); // NOPMD
@@ -679,6 +823,15 @@ public final class FrequencyTrie<V> {
            return countMutableNodes(this.root);
        }

+        /**
+         * Returns the logical key traversal direction used by this builder.
+         *
+         * @return logical key traversal direction
+         */
+        /* default */ WordTraversalDirection traversalDirection() {
+            return this.traversalDirection;
+        }
+
        /**
         * Counts mutable nodes recursively.
         *
--- a/src/main/java/org/egothor/stemmer/FrequencyTrieBuilders.java
+++ b/src/main/java/org/egothor/stemmer/FrequencyTrieBuilders.java
@@ -87,10 +87,11 @@ public final class FrequencyTrieBuilders {
        Objects.requireNonNull(arrayFactory, "arrayFactory");
        Objects.requireNonNull(reductionSettings, "reductionSettings");

-        final FrequencyTrie.Builder<V> builder = new FrequencyTrie.Builder<>(arrayFactory, reductionSettings);
+        final FrequencyTrie.Builder<V> builder = new FrequencyTrie.Builder<>(arrayFactory, reductionSettings,
+                source.traversalDirection());
        final StringBuilder keyBuilder = new StringBuilder(64);

-        copyNode(source.root(), keyBuilder, builder);
+        copyNode(source.root(), keyBuilder, builder, source.traversalDirection());

        LOGGER.log(Level.FINE, "Reconstructed writable builder from compiled trie.");
        return builder;
@@ -119,18 +120,20 @@ public final class FrequencyTrieBuilders {
     *
     * @param node       current compiled node
     * @param keyBuilder current key builder
-     * @param builder    target mutable builder
-     * @param <V>        value type
+     * @param builder            target mutable builder
+     * @param traversalDirection logical key traversal direction used by the source
+     * @param <V>                 value type
     */
    private static <V> void copyNode(final CompiledNode<V> node, final StringBuilder keyBuilder,
-            final FrequencyTrie.Builder<V> builder) {
+            final FrequencyTrie.Builder<V> builder, final WordTraversalDirection traversalDirection) {
+        final String logicalKey = traversalDirection.traversalPathToLogicalKey(keyBuilder);
        for (int valueIndex = 0; valueIndex < node.orderedValues().length; valueIndex++) {
-            builder.put(keyBuilder.toString(), node.orderedValues()[valueIndex], node.orderedCounts()[valueIndex]);
+            builder.put(logicalKey, node.orderedValues()[valueIndex], node.orderedCounts()[valueIndex]);
        }

        for (int childIndex = 0; childIndex < node.edgeLabels().length; childIndex++) {
            keyBuilder.append(node.edgeLabels()[childIndex]);
-            copyNode(node.children()[childIndex], keyBuilder, builder);
+            copyNode(node.children()[childIndex], keyBuilder, builder, traversalDirection);
            keyBuilder.setLength(keyBuilder.length() - 1);
        }
    }
--- a/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java
+++ b/src/main/java/org/egothor/stemmer/PatchCommandEncoder.java
@@ -30,6 +30,7 @@
 ******************************************************************************/
 package org.egothor.stemmer;

+import java.util.Objects;
 import java.util.concurrent.locks.ReentrantLock;

 /**
@@ -37,10 +38,19 @@ import java.util.concurrent.locks.ReentrantLock;
 * and applies such commands back to source words.
 *
 * <p>
- * The generated patch command follows the historical Egothor convention:
- * instructions are serialized so that they are applied from the end of the
- * source word toward its beginning. This keeps the command stream compact and
- * matches the behavior expected by existing stemming data.
+ * The historical Egothor patch language is defined for backward traversal, that
+ * is, from the logical end of a word toward its beginning. This implementation
+ * preserves that proven opcode semantics as the single internal representation.
+ * Forward traversal is implemented by translating source and target words to
+ * the equivalent reversed logical form at the API boundary and then delegating
+ * to the same backward encoder and decoder.
+ * </p>
+ *
+ * <p>
+ * This design keeps the patch language stable, avoids maintaining two distinct
+ * opcode interpreters, and guarantees that forward traversal is semantically
+ * equivalent to running the historical algorithm on the reversed logical word
+ * form.
 * </p>
 *
 * <p>
@@ -57,6 +67,7 @@ import java.util.concurrent.locks.ReentrantLock;
 * instance can still be used safely when needed.
 * </p>
 */
+@SuppressWarnings("PMD.CyclomaticComplexity")
 public final class PatchCommandEncoder {

    /**
@@ -87,12 +98,6 @@ public final class PatchCommandEncoder {

    /**
     * Serialized opcode for a canonical no-operation patch.
-     *
-     * <p>
-     * This opcode represents an identity transform of the whole source word. It is
-     * used to ensure that equal source and target words always produce the same
-     * serialized patch command.
-     * </p>
     */
    private static final char NOOP_OPCODE = 'N';

@@ -103,11 +108,6 @@ public final class PatchCommandEncoder {

    /**
     * Canonical serialized no-operation patch.
-     *
-     * <p>
-     * This constant is returned by {@link #encode(String, String)} whenever source
-     * and target are equal.
-     * </p>
     */
    /* default */ static final String NOOP_PATCH = String.valueOf(new char[] { NOOP_OPCODE, NOOP_ARGUMENT });

@@ -118,13 +118,6 @@ public final class PatchCommandEncoder {

    /**
     * Extra matrix headroom reserved beyond the immediately required dimensions.
-     *
-     * <p>
-     * A small fixed margin reduces repeated reallocation when a caller encodes many
-     * similarly sized terms in sequence. The value is intentionally modest: large
-     * enough to absorb minor size fluctuations, yet small enough to avoid
-     * materially over-allocating the reused dynamic-programming matrices.
-     * </p>
     */
    private static final int CAPACITY_MARGIN = 8;

@@ -148,6 +141,12 @@ public final class PatchCommandEncoder {
     */
    private final int matchCost;

+    /**
+     * Direction in which words are traversed during both patch serialization and
+     * patch application.
+     */
+    private final WordTraversalDirection traversalDirection;
+
    /**
     * Currently allocated source dimension of reusable matrices.
     */
@@ -178,24 +177,16 @@ public final class PatchCommandEncoder {
     */
    private enum Trace {

-        /**
-         * Deletes one character from the source sequence.
-         */
+        /** Deletes one character from the source sequence. */
        DELETE,

-        /**
-         * Inserts one character from the target sequence.
-         */
+        /** Inserts one character from the target sequence. */
        INSERT,

-        /**
-         * Replaces one source character with one target character.
-         */
+        /** Replaces one source character with one target character. */
        REPLACE,

-        /**
-         * Keeps one matching character unchanged.
-         */
+        /** Keeps one matching character unchanged. */
        MATCH
    }

@@ -204,7 +195,17 @@ public final class PatchCommandEncoder {
     * delete = 1, replace = 1, match = 0.
     */
    public PatchCommandEncoder() {
-        this(1, 1, 1, 0);
+        this(WordTraversalDirection.BACKWARD, 1, 1, 1, 0);
+    }
+
+    /**
+     * Creates an encoder with the traditional Egothor cost model and explicit
+     * traversal direction.
+     *
+     * @param traversalDirection traversal direction
+     */
+    public PatchCommandEncoder(final WordTraversalDirection traversalDirection) {
+        this(traversalDirection, 1, 1, 1, 0);
    }

    /**
@@ -215,7 +216,22 @@ public final class PatchCommandEncoder {
     * @param replaceCost cost of replacing one character
     * @param matchCost   cost of keeping one equal character unchanged
     */
-    public PatchCommandEncoder(int insertCost, int deleteCost, int replaceCost, int matchCost) {
+    public PatchCommandEncoder(final int insertCost, final int deleteCost, final int replaceCost, final int matchCost) {
+        this(WordTraversalDirection.BACKWARD, insertCost, deleteCost, replaceCost, matchCost);
+    }
+
+    /**
+     * Creates an encoder with explicit operation costs and traversal direction.
+     *
+     * @param traversalDirection traversal direction
+     * @param insertCost         cost of inserting one character
+     * @param deleteCost         cost of deleting one character
+     * @param replaceCost        cost of replacing one character
+     * @param matchCost          cost of keeping one equal character unchanged
+     */
+    public PatchCommandEncoder(final WordTraversalDirection traversalDirection, final int insertCost,
+            final int deleteCost, final int replaceCost, final int matchCost) {
+        this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
        if (insertCost < 0) {
            throw new IllegalArgumentException("insertCost must be non-negative.");
        }
@@ -248,25 +264,78 @@ public final class PatchCommandEncoder {
     * @return compact patch command, or {@code null} when any argument is
     *         {@code null}
     */
-    public String encode(String source, String target) {
+    public String encode(final String source, final String target) {
        if (source == null || target == null) {
            return null;
        }
-
        if (source.equals(target)) {
            return NOOP_PATCH;
        }

-        int sourceLength = source.length();
-        int targetLength = target.length();
+        final String effectiveSource = toLegacyWordForm(source, this.traversalDirection);
+        final String effectiveTarget = toLegacyWordForm(target, this.traversalDirection);
+        return encodeBackward(effectiveSource, effectiveTarget);
+    }
+
+    /**
+     * Applies a compact patch command to the supplied source word using the
+     * historical backward traversal direction.
+     *
+     * @param source       original source word
+     * @param patchCommand compact patch command
+     * @return transformed word, or {@code null} when {@code source} is {@code null}
+     */
+    public static String apply(final String source, final String patchCommand) {
+        return apply(source, patchCommand, WordTraversalDirection.BACKWARD);
+    }
+
+    /**
+     * Applies a compact patch command to the supplied source word using the
+     * specified traversal direction.
+     *
+     * <p>
+     * Forward traversal is implemented by transforming the source word to the
+     * equivalent legacy backward form, applying the proven historical decoder, and
+     * reversing the transformed result back to the logical word form.
+     * </p>
+     *
+     * @param source             original source word
+     * @param patchCommand       compact patch command
+     * @param traversalDirection traversal direction used by the patch command
+     * @return transformed word, or {@code null} when {@code source} is {@code null}
+     */
+    public static String apply(final String source, final String patchCommand,
+            final WordTraversalDirection traversalDirection) {
+        Objects.requireNonNull(traversalDirection, "traversalDirection");
+        if (source == null) {
+            return null;
+        }
+        if (traversalDirection == WordTraversalDirection.BACKWARD) {
+            return applyBackward(source, patchCommand);
+        }
+        final String transformedSource = reverse(source);
+        final String transformedResult = applyBackward(transformedSource, patchCommand);
+        return reverse(transformedResult);
+    }
+
+    /**
+     * Encodes a patch command using the historical backward Egothor semantics.
+     *
+     * @param source source word form in legacy backward logical space
+     * @param target target word form in legacy backward logical space
+     * @return compact patch command
+     */
+    private String encodeBackward(final String source, final String target) {
+        final int sourceLength = source.length();
+        final int targetLength = target.length();

        lock.lock();
        try {
            ensureCapacity(sourceLength + 1, targetLength + 1);
            initializeBoundaryConditions(sourceLength, targetLength);

-            char[] sourceCharacters = source.toCharArray();
-            char[] targetCharacters = target.toCharArray();
+            final char[] sourceCharacters = source.toCharArray();
+            final char[] targetCharacters = target.toCharArray();

            fillMatrices(sourceCharacters, targetCharacters, sourceLength, targetLength);

@@ -277,26 +346,14 @@ public final class PatchCommandEncoder {
    }

    /**
-     * Applies a compact patch command to the supplied source word.
+     * Applies a patch command using the historical backward Egothor semantics.
     *
-     * <p>
-     * This method operates directly on serialized opcodes rather than mapping them
-     * to another representation. That keeps the hot path small and avoids
-     * unnecessary indirection during patch application.
-     * </p>
-     *
-     * <p>
-     * For compatibility with the historical behavior, malformed patch input that
-     * causes index failures results in the original source word being returned
-     * unchanged.
-     * </p>
-     *
-     * @param source       original source word
+     * @param source       original source word in legacy backward logical space
     * @param patchCommand compact patch command
     * @return transformed word, or {@code null} when {@code source} is {@code null}
     */
    @SuppressWarnings({ "PMD.CyclomaticComplexity", "PMD.AvoidLiteralsInIfCondition" })
-    public static String apply(String source, String patchCommand) {
+    private static String applyBackward(final String source, final String patchCommand) {
        if (source == null) {
            return null;
        }
@@ -306,24 +363,21 @@ public final class PatchCommandEncoder {
        if (NOOP_PATCH.equals(patchCommand)) {
            return source;
        }
-
        if ((patchCommand.length() & 1) != 0) {
            return source;
        }

-        StringBuilder result = new StringBuilder(source);
-
+        final StringBuilder result = new StringBuilder(source);
        if (result.isEmpty()) {
-            return applyToEmptySource(result, patchCommand);
+            return applyBackwardToEmptySource(result, patchCommand);
        }

        int position = result.length() - 1;

        try {
            for (int patchIndex = 0, patchLength = patchCommand.length(); patchIndex < patchLength; patchIndex += 2) { // NOPMD
-
-                char opcode = patchCommand.charAt(patchIndex);
-                char argument = patchCommand.charAt(patchIndex + 1);
+                final char opcode = patchCommand.charAt(patchIndex);
+                final char argument = patchCommand.charAt(patchIndex + 1);

                switch (opcode) {
                    case SKIP_OPCODE:
@@ -343,7 +397,7 @@ public final class PatchCommandEncoder {
                        if (deleteCount < 1) {
                            return source;
                        }
-                        int deleteEndExclusive = position + 1;
+                        final int deleteEndExclusive = position + 1;
                        position -= deleteCount - 1;
                        result.delete(position, deleteEndExclusive);
                        break;
@@ -373,27 +427,7 @@ public final class PatchCommandEncoder {
    }

    /**
-     * Decodes a compact count argument used by skip and delete instructions.
-     *
-     * <p>
-     * Valid encoded counts start at {@code 'a'} for one affected character. Values
-     * below {@code 'a'} are malformed and are reported to callers via the
-     * compatibility fallback path rather than by throwing a dedicated exception.
-     * </p>
-     *
-     * @param argument serialized count argument
-     * @return decoded positive count, or {@code -1} when the argument is malformed
-     */
-    @SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
-    private static int decodeEncodedCount(final char argument) {
-        if (argument < 'a') {
-            return -1;
-        }
-        return argument - 'a' + 1;
-    }
-
-    /**
-     * Applies a patch command to an empty source word.
+     * Applies a backward patch command to an empty source word.
     *
     * <p>
     * Only insertion instructions are meaningful for an empty source. Skip,
@@ -407,12 +441,11 @@ public final class PatchCommandEncoder {
     * @return transformed word, or the original empty word when the patch is
     *         malformed
     */
-    private static String applyToEmptySource(StringBuilder result, String patchCommand) {
+    private static String applyBackwardToEmptySource(final StringBuilder result, final String patchCommand) {
        try {
            for (int patchIndex = 0, patchLength = patchCommand.length(); patchIndex < patchLength; patchIndex += 2) { // NOPMD
-
-                char opcode = patchCommand.charAt(patchIndex);
-                char argument = patchCommand.charAt(patchIndex + 1);
+                final char opcode = patchCommand.charAt(patchIndex);
+                final char argument = patchCommand.charAt(patchIndex + 1);

                switch (opcode) {
                    case INSERT_OPCODE:
@@ -441,6 +474,42 @@ public final class PatchCommandEncoder {
        return result.toString();
    }

+    /**
+     * Converts a logical word to the equivalent word form expected by the legacy
+     * backward encoder.
+     *
+     * @param word               logical word form
+     * @param traversalDirection requested traversal direction
+     * @return word form suitable for the legacy backward algorithm
+     */
+    private static String toLegacyWordForm(final String word, final WordTraversalDirection traversalDirection) {
+        return traversalDirection == WordTraversalDirection.BACKWARD ? word : reverse(word);
+    }
+
+    /**
+     * Reverses the supplied word.
+     *
+     * @param word source word
+     * @return reversed word
+     */
+    private static String reverse(final String word) {
+        return new StringBuilder(word).reverse().toString();
+    }
+
+    /**
+     * Decodes a compact count argument used by skip and delete instructions.
+     *
+     * @param argument serialized count argument
+     * @return decoded positive count, or {@code -1} when the argument is malformed
+     */
+    @SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
+    private static int decodeEncodedCount(final char argument) {
+        if (argument < 'a') {
+            return -1;
+        }
+        return argument - 'a' + 1;
+    }
+
    /**
     * Ensures that internal matrices are large enough for the requested input
     * dimensions.
@@ -448,16 +517,16 @@ public final class PatchCommandEncoder {
     * @param requiredSourceCapacity required source dimension
     * @param requiredTargetCapacity required target dimension
     */
-    private void ensureCapacity(int requiredSourceCapacity, int requiredTargetCapacity) {
-        if (requiredSourceCapacity <= sourceCapacity && requiredTargetCapacity <= targetCapacity) {
+    private void ensureCapacity(final int requiredSourceCapacity, final int requiredTargetCapacity) {
+        if (requiredSourceCapacity <= this.sourceCapacity && requiredTargetCapacity <= this.targetCapacity) {
            return;
        }

-        sourceCapacity = Math.max(sourceCapacity, requiredSourceCapacity) + CAPACITY_MARGIN;
-        targetCapacity = Math.max(targetCapacity, requiredTargetCapacity) + CAPACITY_MARGIN;
+        this.sourceCapacity = Math.max(this.sourceCapacity, requiredSourceCapacity) + CAPACITY_MARGIN;
+        this.targetCapacity = Math.max(this.targetCapacity, requiredTargetCapacity) + CAPACITY_MARGIN;

-        costMatrix = new int[sourceCapacity][targetCapacity];
-        traceMatrix = new Trace[sourceCapacity][targetCapacity];
+        this.costMatrix = new int[this.sourceCapacity][this.targetCapacity];
+        this.traceMatrix = new Trace[this.sourceCapacity][this.targetCapacity];
    }

    /**
@@ -467,18 +536,18 @@ public final class PatchCommandEncoder {
     * @param sourceLength length of the source word
     * @param targetLength length of the target word
     */
-    private void initializeBoundaryConditions(int sourceLength, int targetLength) {
-        costMatrix[0][0] = 0;
-        traceMatrix[0][0] = Trace.MATCH;
+    private void initializeBoundaryConditions(final int sourceLength, final int targetLength) {
+        this.costMatrix[0][0] = 0;
+        this.traceMatrix[0][0] = Trace.MATCH;

        for (int sourceIndex = 1; sourceIndex <= sourceLength; sourceIndex++) {
-            costMatrix[sourceIndex][0] = sourceIndex * deleteCost;
-            traceMatrix[sourceIndex][0] = Trace.DELETE;
+            this.costMatrix[sourceIndex][0] = sourceIndex * this.deleteCost;
+            this.traceMatrix[sourceIndex][0] = Trace.DELETE;
        }

        for (int targetIndex = 1; targetIndex <= targetLength; targetIndex++) {
-            costMatrix[0][targetIndex] = targetIndex * insertCost;
-            traceMatrix[0][targetIndex] = Trace.INSERT;
+            this.costMatrix[0][targetIndex] = targetIndex * this.insertCost;
+            this.traceMatrix[0][targetIndex] = Trace.INSERT;
        }
    }

@@ -491,19 +560,20 @@ public final class PatchCommandEncoder {
     * @param sourceLength     source length
     * @param targetLength     target length
     */
-    private void fillMatrices(char[] sourceCharacters, char[] targetCharacters, int sourceLength, int targetLength) {
+    private void fillMatrices(final char[] sourceCharacters, final char[] targetCharacters, final int sourceLength,
+            final int targetLength) {

        for (int sourceIndex = 1; sourceIndex <= sourceLength; sourceIndex++) {
-            char sourceCharacter = sourceCharacters[sourceIndex - 1];
+            final char sourceCharacter = sourceCharacters[sourceIndex - 1];

            for (int targetIndex = 1; targetIndex <= targetLength; targetIndex++) {
-                char targetCharacter = targetCharacters[targetIndex - 1];
+                final char targetCharacter = targetCharacters[targetIndex - 1];

-                int deleteCandidate = costMatrix[sourceIndex - 1][targetIndex] + deleteCost;
-                int insertCandidate = costMatrix[sourceIndex][targetIndex - 1] + insertCost;
-                int replaceCandidate = costMatrix[sourceIndex - 1][targetIndex - 1] + replaceCost;
-                int matchCandidate = costMatrix[sourceIndex - 1][targetIndex - 1]
-                        + (sourceCharacter == targetCharacter ? matchCost : MISMATCH_PENALTY);
+                final int deleteCandidate = this.costMatrix[sourceIndex - 1][targetIndex] + this.deleteCost;
+                final int insertCandidate = this.costMatrix[sourceIndex][targetIndex - 1] + this.insertCost;
+                final int replaceCandidate = this.costMatrix[sourceIndex - 1][targetIndex - 1] + this.replaceCost;
+                final int matchCandidate = this.costMatrix[sourceIndex - 1][targetIndex - 1]
+                        + (sourceCharacter == targetCharacter ? this.matchCost : MISMATCH_PENALTY);

                int bestCost = matchCandidate;
                Trace bestTrace = Trace.MATCH;
@@ -521,8 +591,8 @@ public final class PatchCommandEncoder {
                    bestTrace = Trace.REPLACE;
                }

-                costMatrix[sourceIndex][targetIndex] = bestCost;
-                traceMatrix[sourceIndex][targetIndex] = bestTrace;
+                this.costMatrix[sourceIndex][targetIndex] = bestCost;
+                this.traceMatrix[sourceIndex][targetIndex] = bestTrace;
            }
        }
    }
@@ -536,9 +606,8 @@ public final class PatchCommandEncoder {
     * @param targetLength     target length
     * @return compact patch command
     */
-    private String buildPatchCommand(char[] targetCharacters, int sourceLength, int targetLength) {
-
-        StringBuilder patchBuilder = new StringBuilder(sourceLength + targetLength);
+    private String buildPatchCommand(final char[] targetCharacters, final int sourceLength, final int targetLength) {
+        final StringBuilder patchBuilder = new StringBuilder(sourceLength + targetLength);

        char pendingDeletes = COUNT_SENTINEL;
        char pendingSkips = COUNT_SENTINEL;
@@ -547,7 +616,7 @@ public final class PatchCommandEncoder {
        int targetIndex = targetLength;

        while (sourceIndex != 0 || targetIndex != 0) {
-            Trace trace = traceMatrix[sourceIndex][targetIndex];
+            final Trace trace = this.traceMatrix[sourceIndex][targetIndex];

            switch (trace) {
                case DELETE:
@@ -612,7 +681,7 @@ public final class PatchCommandEncoder {
     * @param opcode       single-character instruction opcode
     * @param argument     encoded instruction argument
     */
-    private static void appendInstruction(StringBuilder patchBuilder, char opcode, char argument) {
+    private static void appendInstruction(final StringBuilder patchBuilder, final char opcode, final char argument) {
        patchBuilder.append(opcode).append(argument);
    }
 }
--- a/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java
+++ b/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java
@@ -36,9 +36,10 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
-import java.util.StringTokenizer;
 import java.util.logging.Level;
 import java.util.logging.Logger;

@@ -46,14 +47,14 @@ import java.util.logging.Logger;
 * Parser of line-oriented stemmer dictionary files.
 *
 * <p>
- * Each non-empty logical line consists of a stem followed by zero or more known
- * word variants separated by whitespace. The first token is interpreted as the
- * canonical stem, and every following token on the same line is interpreted as
- * a variant belonging to that stem.
+ * Each non-empty logical line uses a tab-separated values layout. The first
+ * column is interpreted as the canonical stem, and every following
+ * tab-separated column on the same line is interpreted as a variant belonging
+ * to that stem.
 *
 * <p>
 * Input lines are normalized to lower case using {@link Locale#ROOT}. Leading
- * and trailing whitespace is ignored.
+ * and trailing whitespace around each column is ignored.
 *
 * <p>
 * The parser supports line remarks and trailing remarks. The remark markers
@@ -61,6 +62,13 @@ import java.util.logging.Logger;
 * remainder of that line is ignored.
 *
 * <p>
+ * Dictionary items containing any Unicode whitespace character are currently
+ * not supported. Such items are ignored and reported through a single
+ * {@link Level#WARNING warning}-level log entry per physical line together with
+ * the source line number, the normalized stem column, and the list of ignored
+ * items from that line.
+ *
+ * <p>
 * This class is intentionally stateless and allocation-light so it can be used
 * both by runtime loading and by offline compilation tooling.
 */
@@ -159,20 +167,50 @@ public final class StemmerDictionaryParser {
                continue;
            }

-            final StringTokenizer tokenizer = new StringTokenizer(normalizedLine); // NOPMD
-            if (!tokenizer.hasMoreTokens()) {
+            final String[] rawColumns = normalizedLine.split("\t", -1);
+            if (rawColumns.length == 0) {
                ignoredLineCount++;
                continue;
            }

-            final String stem = tokenizer.nextToken();
-            final String[] variants = new String[tokenizer.countTokens()]; // NOPMD
+            final String stem = rawColumns[0].strip();
+            final List<String> acceptedVariants = new ArrayList<String>(Math.max(0, rawColumns.length - 1)); // NOPMD

-            for (int index = 0; index < variants.length; index++) {
-                variants[index] = tokenizer.nextToken();
+            if (stem.isEmpty()) {
+                ignoredLineCount++;
+                continue;
            }

-            entryHandler.onEntry(stem, variants, lineNumber);
+            if (containsWhitespaceCharacter(stem)) {
+                if (LOGGER.isLoggable(Level.WARNING)) {
+                    LOGGER.log(Level.WARNING,
+                            "Ignoring dictionary line containing whitespace in source {0} at line {1}, stem {2}.",
+                            new Object[] { sourceDescription, lineNumber, stem }); // NOPMD
+                }
+                continue;
+            }
+
+            int ignored = 0;
+
+            for (int index = 1; index < rawColumns.length; index++) {
+                final String variant = rawColumns[index].strip();
+                if (variant.isEmpty()) {
+                    continue;
+                }
+                if (containsWhitespaceCharacter(variant)) {
+                    ignored++;
+                    continue;
+                }
+                acceptedVariants.add(variant);
+            }
+
+            if (ignored > 0 && LOGGER.isLoggable(Level.WARNING)) {
+                LOGGER.log(Level.WARNING,
+                        "Ignoring dictionary items containing whitespace in source {0} at line {1}, stem {2}, ignored {3}:{4}.",
+                        new Object[] { sourceDescription, lineNumber, stem, ignored, rawColumns.length }); // NOPMD
+            }
+
+            entryHandler.onEntry(stem, acceptedVariants.toArray(String[]::new), lineNumber);
            logicalEntryCount++;
        }

@@ -188,6 +226,22 @@ public final class StemmerDictionaryParser {
        return statistics;
    }

+    /**
+     * Determines whether one dictionary item contains any Unicode whitespace
+     * character.
+     *
+     * @param item dictionary item to inspect
+     * @return {@code true} when the item contains at least one whitespace character
+     */
+    private static boolean containsWhitespaceCharacter(final String item) {
+        for (int index = 0; index < item.length(); index++) {
+            if (Character.isWhitespace(item.charAt(index))) {
+                return true;
+            }
+        }
+        return false;
+    }
+
    /**
     * Removes a trailing remark from one physical line.
     *
--- a/src/main/java/org/egothor/stemmer/StemmerKnowledgeExperiment.java
+++ b/src/main/java/org/egothor/stemmer/StemmerKnowledgeExperiment.java
@@ -0,0 +1,758 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.SplittableRandom;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Evaluates how stemming quality degrades when the compiled trie is built from
+ * only a deterministic subset of the available dictionary knowledge.
+ *
+ * <p>
+ * The experiment operates on whole dictionary entries. For a chosen knowledge
+ * percentage, each parsed dictionary line is deterministically included or
+ * excluded from the training subset using a seeded {@link SplittableRandom}.
+ * The resulting subset is compiled into a {@link FrequencyTrie}, while the
+ * evaluation is performed against all word forms from the original dictionary.
+ * </p>
+ *
+ * <p>
+ * Two lookup APIs are evaluated:
+ * </p>
+ * <ul>
+ * <li>{@link FrequencyTrie#get(String)} through top-1 accuracy</li>
+ * <li>{@link FrequencyTrie#getAll(String)} through global precision, recall,
+ * and F1</li>
+ * </ul>
+ */
+public final class StemmerKnowledgeExperiment {
+
+    /**
+     * Logger of this class.
+     */
+    private static final Logger LOGGER = Logger.getLogger(StemmerKnowledgeExperiment.class.getName());
+
+    /**
+     * Minimum supported knowledge percentage.
+     */
+    public static final int MINIMUM_KNOWLEDGE_PERCENT = 10;
+
+    /**
+     * Maximum supported knowledge percentage.
+     */
+    public static final int MAXIMUM_KNOWLEDGE_PERCENT = 100;
+
+    /**
+     * Step between adjacent evaluated knowledge percentages.
+     */
+    public static final int KNOWLEDGE_PERCENT_STEP = 10;
+
+    /**
+     * Canonical no-op patch command.
+     */
+    private static final String NOOP_PATCH_COMMAND = PatchCommandEncoder.NOOP_PATCH;
+
+    /**
+     * Shared patch encoder reused for subset compilation.
+     */
+    private final PatchCommandEncoder patchCommandEncoder;
+
+    /**
+     * Creates a new experiment harness.
+     */
+    public StemmerKnowledgeExperiment() {
+        this.patchCommandEncoder = new PatchCommandEncoder();
+    }
+
+    /**
+     * Evaluates all supported bundled dictionaries using the supplied seed.
+     *
+     * @param seed deterministic sampling seed
+     * @return immutable ordered list of experiment rows
+     * @throws IOException if reading a bundled dictionary fails
+     */
+    public List<ResultRow> evaluateAllBundledLanguages(final long seed) throws IOException {
+        final List<ResultRow> rows = new ArrayList<>();
+        for (StemmerPatchTrieLoader.Language language : StemmerPatchTrieLoader.Language.values()) {
+            rows.addAll(evaluateBundledLanguage(language, seed));
+        }
+        return List.copyOf(rows);
+    }
+
+    /**
+     * Evaluates one bundled dictionary across all supported experiment
+     * configurations.
+     *
+     * @param language bundled language dictionary
+     * @param seed     deterministic sampling seed
+     * @return immutable ordered list of experiment rows
+     * @throws NullPointerException if {@code language} is {@code null}
+     * @throws IOException          if reading the bundled dictionary fails
+     */
+    public List<ResultRow> evaluateBundledLanguage(final StemmerPatchTrieLoader.Language language, final long seed)
+            throws IOException {
+        Objects.requireNonNull(language, "language");
+        final String resourcePath = language.resourcePath();
+        try (InputStream inputStream = StemmerPatchTrieLoader.openBundledResource(resourcePath)) {
+            try (BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
+                return evaluate(reader, resourcePath, language.name(), seed);
+            }
+        }
+    }
+
+    /**
+     * Evaluates one filesystem dictionary across all supported experiment
+     * configurations.
+     *
+     * @param dictionaryPath path to a dictionary file
+     * @param seed           deterministic sampling seed
+     * @return immutable ordered list of experiment rows
+     * @throws NullPointerException if {@code dictionaryPath} is {@code null}
+     * @throws IOException          if reading fails
+     */
+    public List<ResultRow> evaluatePath(final Path dictionaryPath, final long seed) throws IOException {
+        Objects.requireNonNull(dictionaryPath, "dictionaryPath");
+        try (BufferedReader reader = Files.newBufferedReader(dictionaryPath, StandardCharsets.UTF_8)) {
+            return evaluate(reader, dictionaryPath.toAbsolutePath().toString(), dictionaryPath.getFileName().toString(),
+                    seed);
+        }
+    }
+
+    /**
+     * Evaluates a dictionary provided through an arbitrary reader.
+     *
+     * @param reader            source reader
+     * @param sourceDescription logical source description
+     * @param languageLabel     label stored in the result rows
+     * @param seed              deterministic sampling seed
+     * @return immutable ordered list of experiment rows
+     * @throws NullPointerException if any argument except {@code seed} is
+     *                              {@code null}
+     * @throws IOException          if parsing fails
+     */
+    public List<ResultRow> evaluate(final Reader reader, final String sourceDescription, final String languageLabel,
+            final long seed) throws IOException {
+        Objects.requireNonNull(reader, "reader");
+        Objects.requireNonNull(sourceDescription, "sourceDescription");
+        Objects.requireNonNull(languageLabel, "languageLabel");
+
+        final DictionaryData dictionaryData = readDictionary(reader, sourceDescription);
+        final List<ResultRow> rows = new ArrayList<>();
+
+        for (ReductionMode reductionMode : ReductionMode.values()) {
+            final ReductionSettings reductionSettings = ReductionSettings.withDefaults(reductionMode);
+            for (boolean storeOriginal : new boolean[] { false, true }) { // NOPMD
+                for (boolean includeStemInEvaluation : new boolean[] { false, true }) { // NOPMD
+                    for (int knowledgePercent = MINIMUM_KNOWLEDGE_PERCENT; knowledgePercent <= MAXIMUM_KNOWLEDGE_PERCENT; knowledgePercent += KNOWLEDGE_PERCENT_STEP) {
+                        final ResultRow row = evaluateScenario(dictionaryData, languageLabel, seed, reductionSettings,
+                                storeOriginal, includeStemInEvaluation, knowledgePercent);
+                        rows.add(row);
+                    }
+                }
+            }
+        }
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.log(Level.INFO, "Knowledge experiment finished for source {0}: entries={1}, rows={2}, seed={3}.",
+                    new Object[] { sourceDescription, dictionaryData.entryCount(), rows.size(), seed });
+        }
+
+        return List.copyOf(rows);
+    }
+
+    /**
+     * Writes result rows as UTF-8 CSV with a stable fixed header.
+     *
+     * @param outputPath target file path
+     * @param rows       rows to write
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if writing fails
+     */
+    public static void writeCsv(final Path outputPath, final List<ResultRow> rows) throws IOException {
+        Objects.requireNonNull(outputPath, "outputPath");
+        Objects.requireNonNull(rows, "rows");
+
+        final Path parent = outputPath.getParent();
+        if (parent != null) {
+            Files.createDirectories(parent);
+        }
+
+        final List<String> lines = new ArrayList<>(rows.size() + 1);
+        lines.add(ResultRow.csvHeader());
+        for (ResultRow row : rows) {
+            lines.add(row.toCsvRow());
+        }
+        Files.write(outputPath, lines, StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Parses the full dictionary into an in-memory representation suitable for
+     * repeated deterministic subset compilation.
+     *
+     * @param reader            source reader
+     * @param sourceDescription logical source description
+     * @return parsed dictionary data
+     * @throws IOException if parsing fails
+     */
+    private static DictionaryData readDictionary(final Reader reader, final String sourceDescription)
+            throws IOException {
+        final List<DictionaryEntry> entries = new ArrayList<>();
+        final StemmerDictionaryParser.ParseStatistics parseStatistics = StemmerDictionaryParser.parse(reader,
+                sourceDescription,
+                (stem, variants, lineNumber) -> entries.add(new DictionaryEntry(stem, variants, lineNumber)));
+        return new DictionaryData(sourceDescription, parseStatistics, entries);
+    }
+
+    /**
+     * Evaluates one concrete experiment scenario.
+     *
+     * @param dictionaryData          parsed dictionary data
+     * @param languageLabel           logical language label
+     * @param seed                    deterministic sampling seed
+     * @param reductionSettings       reduction settings
+     * @param storeOriginal           whether canonical stems are inserted with a
+     *                                no-op patch
+     * @param includeStemInEvaluation whether the canonical stem itself is evaluated
+     * @param knowledgePercent        retained percentage of dictionary entries
+     * @return result row
+     */
+    private ResultRow evaluateScenario(final DictionaryData dictionaryData, final String languageLabel, final long seed,
+            final ReductionSettings reductionSettings, final boolean storeOriginal,
+            final boolean includeStemInEvaluation, final int knowledgePercent) {
+        final FrequencyTrie<String> trie = compileSubset(dictionaryData, reductionSettings, storeOriginal,
+                knowledgePercent, seed);
+
+        long evaluatedInputCount = 0L;
+        long getCorrectCount = 0L;
+        long getAllTruePositiveCount = 0L;
+        long getAllFalsePositiveCount = 0L;
+        long getAllCoveredInputCount = 0L;
+        long uniqueCandidateCount = 0L;
+
+        for (DictionaryEntry entry : dictionaryData.entries()) {
+            if (includeStemInEvaluation) {
+                final EvaluationCounts stemCounts = evaluateInput(entry.stem(), entry.stem(), trie);
+                evaluatedInputCount++;
+                getCorrectCount += stemCounts.getCorrect();
+                getAllTruePositiveCount += stemCounts.getAllTruePositives();
+                getAllFalsePositiveCount += stemCounts.getAllFalsePositives();
+                getAllCoveredInputCount += stemCounts.getAllCoveredInputs();
+                uniqueCandidateCount += stemCounts.getUniqueCandidateCount();
+            }
+            for (String variant : entry.variants()) {
+                final EvaluationCounts variantCounts = evaluateInput(variant, entry.stem(), trie);
+                evaluatedInputCount++;
+                getCorrectCount += variantCounts.getCorrect();
+                getAllTruePositiveCount += variantCounts.getAllTruePositives();
+                getAllFalsePositiveCount += variantCounts.getAllFalsePositives();
+                getAllCoveredInputCount += variantCounts.getAllCoveredInputs();
+                uniqueCandidateCount += variantCounts.getUniqueCandidateCount();
+            }
+        }
+
+        final long trainingEntryCount = countSelectedEntries(dictionaryData.entryCount(), seed, knowledgePercent);
+        final double getAccuracy = ratio(getCorrectCount, evaluatedInputCount);
+        final double getAllPrecision = ratio(getAllTruePositiveCount,
+                getAllTruePositiveCount + getAllFalsePositiveCount);
+        final double getAllRecall = ratio(getAllCoveredInputCount, evaluatedInputCount);
+        final double getAllF1 = f1(getAllPrecision, getAllRecall);
+        final double averageUniqueCandidateCount = ratio(uniqueCandidateCount, evaluatedInputCount);
+
+        return new ResultRow(languageLabel, reductionSettings.reductionMode().name(), storeOriginal,
+                includeStemInEvaluation, knowledgePercent, seed, dictionaryData.entryCount(), trainingEntryCount,
+                evaluatedInputCount, getCorrectCount, getAccuracy, getAllTruePositiveCount, getAllFalsePositiveCount,
+                getAllCoveredInputCount, getAllPrecision, getAllRecall, getAllF1, averageUniqueCandidateCount);
+    }
+
+    /**
+     * Compiles a trie from the deterministically selected subset of dictionary
+     * entries.
+     *
+     * @param dictionaryData    parsed dictionary data
+     * @param reductionSettings reduction settings
+     * @param storeOriginal     whether stems themselves should be stored
+     * @param knowledgePercent  retained percentage of dictionary entries
+     * @param seed              deterministic sampling seed
+     * @return compiled trie for the selected subset
+     */
+    private FrequencyTrie<String> compileSubset(final DictionaryData dictionaryData,
+            final ReductionSettings reductionSettings, final boolean storeOriginal, final int knowledgePercent,
+            final long seed) {
+        validateKnowledgePercent(knowledgePercent);
+
+        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings);
+        final SplittableRandom random = new SplittableRandom(seed);
+
+        for (DictionaryEntry entry : dictionaryData.entries()) {
+            if (!isSelected(random, knowledgePercent)) {
+                continue;
+            }
+            if (storeOriginal) {
+                builder.put(entry.stem(), NOOP_PATCH_COMMAND);
+            }
+            for (String variant : entry.variants()) {
+                final String patch = this.patchCommandEncoder.encode(variant, entry.stem());
+                builder.put(variant, patch);
+            }
+        }
+        return builder.build();
+    }
+
+    /**
+     * Evaluates one input word form against both lookup APIs.
+     *
+     * @param input        input form to transform
+     * @param expectedStem expected stem
+     * @param trie         compiled trie under test
+     * @return immutable counts for this single input
+     */
+    private static EvaluationCounts evaluateInput(final String input, final String expectedStem,
+            final FrequencyTrie<String> trie) {
+        long getCorrect = 0L;
+        final String preferredPatch = trie.get(input);
+        if (preferredPatch != null) {
+            final String preferredStem = PatchCommandEncoder.apply(input, preferredPatch);
+            if (expectedStem.equals(preferredStem)) {
+                getCorrect = 1L;
+            }
+        } else {
+            if (expectedStem.equals(input)) {
+                getCorrect = 1L;
+            }
+        }
+
+        final String[] patches = trie.getAll(input);
+
+        long truePositives = 0L;
+        long falsePositives = 0L;
+        long coveredInputs = 0L;
+        for (String patch : patches) {
+            final String candidateStem = PatchCommandEncoder.apply(input, patch);
+            if (expectedStem.equals(candidateStem)) {
+                truePositives++;
+                coveredInputs = 1L;
+            } else {
+                falsePositives++;
+            }
+        }
+        return new EvaluationCounts(getCorrect, truePositives, falsePositives, coveredInputs, patches.length);
+    }
+
+    /**
+     * Counts how many entries would be selected for one scenario without
+     * recompiling the trie.
+     *
+     * @param entryCount       total entry count
+     * @param seed             deterministic sampling seed
+     * @param knowledgePercent retained percentage of dictionary entries
+     * @return selected entry count
+     */
+    private static long countSelectedEntries(final int entryCount, final long seed, final int knowledgePercent) {
+        validateKnowledgePercent(knowledgePercent);
+        final SplittableRandom random = new SplittableRandom(seed);
+        long count = 0L;
+        for (int index = 0; index < entryCount; index++) {
+            if (isSelected(random, knowledgePercent)) {
+                count++;
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Returns whether one entry is selected for the supplied knowledge level.
+     *
+     * @param random           deterministic random source
+     * @param knowledgePercent retained percentage of entries
+     * @return {@code true} when the entry should be kept
+     */
+    private static boolean isSelected(final SplittableRandom random, final int knowledgePercent) {
+        return random.nextInt(100) < knowledgePercent;
+    }
+
+    /**
+     * Validates one knowledge percentage value.
+     *
+     * @param knowledgePercent value to validate
+     */
+    private static void validateKnowledgePercent(final int knowledgePercent) {
+        if (knowledgePercent < MINIMUM_KNOWLEDGE_PERCENT || knowledgePercent > MAXIMUM_KNOWLEDGE_PERCENT
+                || knowledgePercent % KNOWLEDGE_PERCENT_STEP != 0) {
+            throw new IllegalArgumentException(
+                    "knowledgePercent must be one of 10, 20, ..., 100 but was " + knowledgePercent + '.');
+        }
+    }
+
+    /**
+     * Computes a safe ratio.
+     *
+     * @param numerator   numerator
+     * @param denominator denominator
+     * @return ratio, or {@code 0.0} when the denominator is zero
+     */
+    private static double ratio(final long numerator, final long denominator) {
+        if (denominator == 0L) { // NOPMD
+            return 0.0d;
+        }
+        return (double) numerator / (double) denominator; // NOPMD
+    }
+
+    /**
+     * Computes the harmonic mean of precision and recall.
+     *
+     * @param precision global precision
+     * @param recall    global recall
+     * @return F1 score, or {@code 0.0} when both inputs are zero
+     */
+    private static double f1(final double precision, final double recall) {
+        if (precision == 0.0d && recall == 0.0d) {
+            return 0.0d;
+        }
+        return 2.0d * precision * recall / (precision + recall);
+    }
+
+    /**
+     * One parsed dictionary line.
+     *
+     * @param stem       canonical stem
+     * @param variants   known variants of the stem
+     * @param lineNumber physical line number in the source dictionary
+     */
+    private record DictionaryEntry(String stem, String[] variants, int lineNumber) {
+
+        /**
+         * Creates a parsed dictionary entry.
+         *
+         * @param stem       canonical stem
+         * @param variants   known variants of the stem
+         * @param lineNumber physical line number in the source dictionary
+         */
+        private DictionaryEntry {
+            Objects.requireNonNull(stem, "stem");
+            Objects.requireNonNull(variants, "variants");
+            if (lineNumber < 1) { // NOPMD
+                throw new IllegalArgumentException("lineNumber must be positive.");
+            }
+        }
+    }
+
+    /**
+     * Parsed dictionary state reused across all scenarios.
+     *
+     * @param sourceDescription logical source description
+     * @param parseStatistics   parser statistics
+     * @param entries           immutable ordered entries
+     */
+    private record DictionaryData(String sourceDescription, StemmerDictionaryParser.ParseStatistics parseStatistics,
+            List<DictionaryEntry> entries) {
+
+        /**
+         * Creates parsed dictionary data.
+         *
+         * @param sourceDescription logical source description
+         * @param parseStatistics   parser statistics
+         * @param entries           immutable ordered entries
+         */
+        private DictionaryData {
+            Objects.requireNonNull(sourceDescription, "sourceDescription");
+            Objects.requireNonNull(parseStatistics, "parseStatistics");
+            Objects.requireNonNull(entries, "entries");
+            entries = List.copyOf(entries);
+        }
+
+        /**
+         * Returns the number of logical dictionary entries.
+         *
+         * @return entry count
+         */
+        private int entryCount() {
+            return this.entries.size();
+        }
+    }
+
+    /**
+     * Per-input evaluation counts.
+     */
+    private static final class EvaluationCounts {
+
+        /**
+         * Preferred lookup correctness.
+         */
+        private final long getCorrect;
+
+        /**
+         * Number of correct candidates returned by {@code getAll()}.
+         */
+        private final long getAllTruePositives;
+
+        /**
+         * Number of incorrect candidates returned by {@code getAll()}.
+         */
+        private final long getAllFalsePositives;
+
+        /**
+         * Whether the correct stem was covered by {@code getAll()}.
+         */
+        private final long getAllCoveredInputs;
+
+        /**
+         * Number of candidate commands returned by {@code getAll()}.
+         */
+        private final long uniqueCandidateCount;
+
+        /**
+         * Creates a new immutable counter object.
+         *
+         * @param getCorrect           preferred lookup correctness
+         * @param getAllTruePositives  correct candidates
+         * @param getAllFalsePositives incorrect candidates
+         * @param getAllCoveredInputs  coverage marker
+         * @param uniqueCandidateCount candidate command count
+         */
+        private EvaluationCounts(final long getCorrect, final long getAllTruePositives, final long getAllFalsePositives,
+                final long getAllCoveredInputs, final long uniqueCandidateCount) {
+            this.getCorrect = getCorrect;
+            this.getAllTruePositives = getAllTruePositives;
+            this.getAllFalsePositives = getAllFalsePositives;
+            this.getAllCoveredInputs = getAllCoveredInputs;
+            this.uniqueCandidateCount = uniqueCandidateCount;
+        }
+
+        /**
+         * Returns preferred lookup correctness.
+         *
+         * @return preferred lookup correctness
+         */
+        private long getCorrect() {
+            return this.getCorrect;
+        }
+
+        /**
+         * Returns the number of correct candidates.
+         *
+         * @return correct candidates
+         */
+        private long getAllTruePositives() {
+            return this.getAllTruePositives;
+        }
+
+        /**
+         * Returns the number of incorrect candidates.
+         *
+         * @return incorrect candidates
+         */
+        private long getAllFalsePositives() {
+            return this.getAllFalsePositives;
+        }
+
+        /**
+         * Returns the per-input coverage marker.
+         *
+         * @return coverage marker
+         */
+        private long getAllCoveredInputs() {
+            return this.getAllCoveredInputs;
+        }
+
+        /**
+         * Returns the number of candidate commands.
+         *
+         * @return candidate command count
+         */
+        private long getUniqueCandidateCount() {
+            return this.uniqueCandidateCount;
+        }
+    }
+
+    /**
+     * One immutable result row of the knowledge experiment.
+     *
+     * @param language                    language label
+     * @param reductionMode               reduction mode name
+     * @param storeOriginal               whether no-op patches were stored for
+     *                                    canonical stems
+     * @param includeStemInEvaluation     whether canonical stems were part of the
+     *                                    evaluated inputs
+     * @param knowledgePercent            retained knowledge percentage
+     * @param seed                        deterministic sampling seed
+     * @param dictionaryEntryCount        total parsed dictionary entry count
+     * @param trainingEntryCount          selected dictionary entry count used for
+     *                                    build
+     * @param evaluatedInputCount         total evaluated input count
+     * @param getCorrectCount             number of correct preferred
+     *                                    transformations
+     * @param getAccuracy                 preferred lookup accuracy
+     * @param getAllTruePositiveCount     number of unique correct candidates from
+     *                                    {@code getAll()}
+     * @param getAllFalsePositiveCount    number of unique incorrect candidates from
+     *                                    {@code getAll()}
+     * @param getAllCoveredInputCount     number of inputs for which the correct
+     *                                    stem appeared in {@code getAll()}
+     * @param getAllPrecision             global candidate precision for
+     *                                    {@code getAll()}
+     * @param getAllRecall                global input recall for {@code getAll()}
+     * @param getAllF1                    F1 score derived from {@code getAll()}
+     *                                    precision and recall
+     * @param averageUniqueCandidateCount average number of unique candidate stems
+     *                                    per input
+     */
+    public record ResultRow(String language, String reductionMode, boolean storeOriginal,
+            boolean includeStemInEvaluation, int knowledgePercent, long seed, int dictionaryEntryCount,
+            long trainingEntryCount, long evaluatedInputCount, long getCorrectCount, double getAccuracy,
+            long getAllTruePositiveCount, long getAllFalsePositiveCount, long getAllCoveredInputCount,
+            double getAllPrecision, double getAllRecall, double getAllF1, double averageUniqueCandidateCount) {
+
+        /**
+         * Creates one immutable result row.
+         *
+         * @param language                    language label
+         * @param reductionMode               reduction mode name
+         * @param storeOriginal               whether no-op patches were stored for
+         *                                    canonical stems
+         * @param includeStemInEvaluation     whether canonical stems were evaluated
+         * @param knowledgePercent            retained knowledge percentage
+         * @param seed                        deterministic sampling seed
+         * @param dictionaryEntryCount        total dictionary entry count
+         * @param trainingEntryCount          selected training entry count
+         * @param evaluatedInputCount         total evaluated input count
+         * @param getCorrectCount             number of correct preferred
+         *                                    transformations
+         * @param getAccuracy                 preferred lookup accuracy
+         * @param getAllTruePositiveCount     number of unique correct candidates
+         * @param getAllFalsePositiveCount    number of unique incorrect candidates
+         * @param getAllCoveredInputCount     coverage count for {@code getAll()}
+         * @param getAllPrecision             global candidate precision for
+         *                                    {@code getAll()}
+         * @param getAllRecall                global input recall for {@code getAll()}
+         * @param getAllF1                    harmonic mean of precision and recall
+         * @param averageUniqueCandidateCount average unique candidate count per input
+         */
+        @SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
+        public ResultRow {
+            Objects.requireNonNull(language, "language");
+            Objects.requireNonNull(reductionMode, "reductionMode");
+            validateKnowledgePercent(knowledgePercent);
+            if (dictionaryEntryCount < 0) {
+                throw new IllegalArgumentException("dictionaryEntryCount must not be negative.");
+            }
+            if (trainingEntryCount < 0L) {
+                throw new IllegalArgumentException("trainingEntryCount must not be negative.");
+            }
+            if (evaluatedInputCount < 0L) {
+                throw new IllegalArgumentException("evaluatedInputCount must not be negative.");
+            }
+            if (getCorrectCount < 0L) {
+                throw new IllegalArgumentException("getCorrectCount must not be negative.");
+            }
+            if (getAllTruePositiveCount < 0L) {
+                throw new IllegalArgumentException("getAllTruePositiveCount must not be negative.");
+            }
+            if (getAllFalsePositiveCount < 0L) {
+                throw new IllegalArgumentException("getAllFalsePositiveCount must not be negative.");
+            }
+            if (getAllCoveredInputCount < 0L) {
+                throw new IllegalArgumentException("getAllCoveredInputCount must not be negative.");
+            }
+        }
+
+        /**
+         * Returns the stable CSV header of this result format.
+         *
+         * @return CSV header line
+         */
+        public static String csvHeader() {
+            return String.join(",",
+                    List.of("language", "reductionMode", "storeOriginal", "includeStemInEvaluation", "knowledgePercent",
+                            "seed", "dictionaryEntryCount", "trainingEntryCount", "evaluatedInputCount",
+                            "getCorrectCount", "getAccuracy", "getAllTruePositiveCount", "getAllFalsePositiveCount",
+                            "getAllCoveredInputCount", "getAllPrecision", "getAllRecall", "getAllF1",
+                            "averageUniqueCandidateCount"));
+        }
+
+        /**
+         * Serializes this row as one CSV record.
+         *
+         * @return CSV record
+         */
+        public String toCsvRow() {
+            return String.join(",",
+                    List.of(escapeCsv(this.language), escapeCsv(this.reductionMode), String.valueOf(this.storeOriginal),
+                            String.valueOf(this.includeStemInEvaluation), String.valueOf(this.knowledgePercent),
+                            String.valueOf(this.seed), String.valueOf(this.dictionaryEntryCount),
+                            String.valueOf(this.trainingEntryCount), String.valueOf(this.evaluatedInputCount),
+                            String.valueOf(this.getCorrectCount), formatDouble(this.getAccuracy),
+                            String.valueOf(this.getAllTruePositiveCount), String.valueOf(this.getAllFalsePositiveCount),
+                            String.valueOf(this.getAllCoveredInputCount), formatDouble(this.getAllPrecision),
+                            formatDouble(this.getAllRecall), formatDouble(this.getAllF1),
+                            formatDouble(this.averageUniqueCandidateCount)));
+        }
+
+        /**
+         * Escapes a string for CSV output.
+         *
+         * @param value value to escape
+         * @return escaped CSV cell
+         */
+        private static String escapeCsv(final String value) {
+            if (value.indexOf(',') < 0 && value.indexOf('"') < 0 && value.indexOf('\n') < 0
+                    && value.indexOf('\r') < 0) {
+                return value;
+            }
+            return '"' + value.replace("\"", "\"\"") + '"';
+        }
+
+        /**
+         * Formats one floating-point value using a locale-independent decimal
+         * representation.
+         *
+         * @param value value to format
+         * @return formatted value
+         */
+        private static String formatDouble(final double value) {
+            return String.format(Locale.ROOT, "%.10f", value);
+        }
+    }
+}
--- a/src/main/java/org/egothor/stemmer/StemmerKnowledgeExperimentCli.java
+++ b/src/main/java/org/egothor/stemmer/StemmerKnowledgeExperimentCli.java
@@ -0,0 +1,344 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Command-line entry point for the stemmer knowledge experiment.
+ */
+public final class StemmerKnowledgeExperimentCli {
+
+    /**
+     * Logger of this class.
+     */
+    private static final Logger LOGGER = Logger.getLogger(StemmerKnowledgeExperimentCli.class.getName());
+
+    /**
+     * Exit status indicating success.
+     */
+    private static final int EXIT_SUCCESS = 0;
+
+    /**
+     * Exit status indicating processing failure.
+     */
+    private static final int EXIT_PROCESSING_ERROR = 1;
+
+    /**
+     * Exit status indicating invalid command-line usage.
+     */
+    private static final int EXIT_USAGE_ERROR = 2;
+
+    /**
+     * Default deterministic seed.
+     */
+    private static final long DEFAULT_SEED = 20_260_421L;
+
+    /**
+     * Default output report location.
+     */
+    private static final Path DEFAULT_OUTPUT_PATH = Path.of("build", "reports", "stemmer-knowledge-experiment.csv");
+
+    /**
+     * Usage banner.
+     */
+    private static final String USAGE = String.join(System.lineSeparator(),
+            "Usage: StemmerKnowledgeExperimentCli [--bundled-all | --bundled-language <LANG> | --input <PATH>]",
+            "       [--seed <LONG>] [--output <CSV_PATH>]", "", "Examples:", "  --bundled-all",
+            "  --bundled-language US_UK_PROFI --seed 20260421",
+            "  --input src/main/resources/us_uk/stemmer --output build/reports/knowledge.csv");
+
+    /**
+     * Utility class.
+     */
+    private StemmerKnowledgeExperimentCli() {
+        throw new AssertionError("No instances.");
+    }
+
+    /**
+     * Executes the CLI as a standalone process.
+     *
+     * @param arguments command-line arguments
+     */
+    public static void main(final String[] arguments) {
+        final int exitCode = execute(arguments);
+        System.exit(exitCode);
+    }
+
+    /**
+     * Executes the CLI and translates all outcomes to process exit codes.
+     *
+     * @param arguments command-line arguments
+     * @return process exit code
+     */
+    /* default */ static int execute(final String... arguments) {
+        Objects.requireNonNull(arguments, "arguments");
+        try {
+            final CliOptions options = CliOptions.parse(arguments);
+            if (options.command() == Command.HELP) {
+                printUsage(System.out);
+                return EXIT_SUCCESS;
+            }
+            return runExperiment(options);
+        } catch (final CliUsageException exception) {
+            if (LOGGER.isLoggable(Level.SEVERE)) {
+                LOGGER.log(Level.SEVERE, "Invalid command-line usage for arguments {0}: {1}",
+                        new Object[] { Arrays.toString(arguments), exception.getMessage() });
+            }
+            printUsage(System.err);
+            return EXIT_USAGE_ERROR;
+        } catch (final IOException exception) {
+            if (LOGGER.isLoggable(Level.SEVERE)) {
+                LOGGER.log(Level.SEVERE, "Experiment processing failed for arguments {0}", Arrays.toString(arguments));
+                LOGGER.log(Level.SEVERE, "Processing failure details.", exception);
+            }
+            return EXIT_PROCESSING_ERROR;
+        } catch (final RuntimeException exception) { // NOPMD
+            if (LOGGER.isLoggable(Level.SEVERE)) {
+                LOGGER.log(Level.SEVERE, "Unexpected runtime failure for arguments {0}", Arrays.toString(arguments));
+                LOGGER.log(Level.SEVERE, "Unexpected processing failure details.", exception);
+            }
+            return EXIT_PROCESSING_ERROR;
+        }
+    }
+
+    /**
+     * Runs the experiment for already validated options.
+     *
+     * @param options validated CLI options
+     * @return process exit code
+     * @throws IOException if experiment execution fails
+     */
+    private static int runExperiment(final CliOptions options) throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = switch (options.sourceMode()) {
+            case INPUT_PATH -> experiment.evaluatePath(options.inputPath(), options.seed());
+            case SINGLE_BUNDLED_LANGUAGE -> experiment.evaluateBundledLanguage(options.language(), options.seed());
+            case ALL_BUNDLED_LANGUAGES -> experiment.evaluateAllBundledLanguages(options.seed());
+        };
+
+        StemmerKnowledgeExperiment.writeCsv(options.outputPath(), rows);
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.log(Level.INFO, "Knowledge experiment report written to {0} with {1} rows.",
+                    new Object[] { options.outputPath().toAbsolutePath(), rows.size() });
+        }
+        return EXIT_SUCCESS;
+    }
+
+    /**
+     * Prints the CLI usage text.
+     *
+     * @param stream target output stream
+     */
+    private static void printUsage(final PrintStream stream) {
+        stream.println(USAGE);
+    }
+
+    /**
+     * Supported top-level CLI commands.
+     */
+    private enum Command {
+
+        /**
+         * Executes the experiment.
+         */
+        EXECUTE,
+
+        /**
+         * Prints usage text.
+         */
+        HELP
+    }
+
+    /**
+     * Supported experiment source selection modes.
+     */
+    private enum ExperimentSourceMode {
+
+        /**
+         * Runs the experiment for all bundled languages.
+         */
+        ALL_BUNDLED_LANGUAGES,
+
+        /**
+         * Runs the experiment for one bundled language.
+         */
+        SINGLE_BUNDLED_LANGUAGE,
+
+        /**
+         * Runs the experiment for one external dictionary path.
+         */
+        INPUT_PATH
+    }
+
+    /**
+     * Exception indicating invalid command-line usage.
+     */
+    private static final class CliUsageException extends Exception {
+
+        private static final long serialVersionUID = -3904751711104596247L;
+
+        /**
+         * Creates a new usage exception.
+         *
+         * @param message failure description
+         */
+        private CliUsageException(final String message) {
+            super(message);
+        }
+
+        /**
+         * Creates a new usage exception.
+         *
+         * @param message failure description
+         * @param cause   original cause
+         */
+        private CliUsageException(final String message, final Throwable cause) {
+            super(message, cause);
+        }
+    }
+
+    /**
+     * Parsed CLI options.
+     *
+     * @param command    selected top-level command
+     * @param sourceMode selected experiment source mode
+     * @param inputPath  optional filesystem dictionary path
+     * @param language   optional bundled language
+     * @param seed       deterministic sampling seed
+     * @param outputPath CSV report path
+     */
+    private record CliOptions(Command command, ExperimentSourceMode sourceMode, Path inputPath,
+            StemmerPatchTrieLoader.Language language, long seed, Path outputPath) {
+
+        /**
+         * Parses the command line.
+         *
+         * @param arguments command-line arguments
+         * @return parsed options
+         * @throws CliUsageException if the command line is invalid
+         */
+        @SuppressWarnings("PMD.AvoidReassigningLoopVariables")
+        private static CliOptions parse(final String... arguments) throws CliUsageException {
+            Objects.requireNonNull(arguments, "arguments");
+
+            Command command = Command.EXECUTE;
+            ExperimentSourceMode sourceMode = ExperimentSourceMode.ALL_BUNDLED_LANGUAGES;
+            Path inputPath = null;
+            StemmerPatchTrieLoader.Language language = null;
+            long seed = DEFAULT_SEED;
+            Path outputPath = DEFAULT_OUTPUT_PATH;
+
+            final List<String> tokens = new ArrayList<>(List.of(arguments));
+            for (int index = 0; index < tokens.size(); index++) {
+                final String token = tokens.get(index);
+                switch (token) {
+                    case "--input" -> {
+                        sourceMode = ExperimentSourceMode.INPUT_PATH;
+                        inputPath = Path.of(requireValue(tokens, ++index, token));
+                        language = null;
+                    }
+                    case "--bundled-language" -> {
+                        sourceMode = ExperimentSourceMode.SINGLE_BUNDLED_LANGUAGE;
+                        language = parseLanguage(requireValue(tokens, ++index, token));
+                        inputPath = null;
+                    }
+                    case "--bundled-all" -> {
+                        sourceMode = ExperimentSourceMode.ALL_BUNDLED_LANGUAGES;
+                        inputPath = null;
+                        language = null;
+                    }
+                    case "--seed" -> seed = parseSeed(requireValue(tokens, ++index, token));
+                    case "--output" -> outputPath = Path.of(requireValue(tokens, ++index, token));
+                    case "--help", "-h" -> command = Command.HELP;
+                    default -> throw new CliUsageException("Unknown argument: " + token);
+                }
+            }
+
+            return new CliOptions(command, sourceMode, inputPath, language, seed, outputPath);
+        }
+
+        /**
+         * Returns the required value after one option token.
+         *
+         * @param tokens all tokens
+         * @param index  expected value index
+         * @param option current option token
+         * @return option value
+         * @throws CliUsageException if the option value is missing
+         */
+        private static String requireValue(final List<String> tokens, final int index, final String option)
+                throws CliUsageException {
+            if (index >= tokens.size()) {
+                throw new CliUsageException("Missing value for option " + option + '.');
+            }
+            return tokens.get(index);
+        }
+
+        /**
+         * Parses the deterministic seed.
+         *
+         * @param value textual seed value
+         * @return parsed seed
+         * @throws CliUsageException if the seed value is invalid
+         */
+        private static long parseSeed(final String value) throws CliUsageException {
+            try {
+                return Long.parseLong(value);
+            } catch (final NumberFormatException exception) {
+                throw new CliUsageException("Invalid value for --seed: " + value, exception);
+            }
+        }
+
+        /**
+         * Parses the bundled language selector.
+         *
+         * @param value textual language name
+         * @return parsed language
+         * @throws CliUsageException if the language value is invalid
+         */
+        private static StemmerPatchTrieLoader.Language parseLanguage(final String value) throws CliUsageException {
+            try {
+                return StemmerPatchTrieLoader.Language.valueOf(value);
+            } catch (final IllegalArgumentException exception) {
+                throw new CliUsageException("Invalid value for --bundled-language: " + value, exception);
+            }
+        }
+    }
+}
--- a/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java
+++ b/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java
@@ -30,9 +30,11 @@
 ******************************************************************************/
 package org.egothor.stemmer;

+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
@@ -40,14 +42,15 @@ import java.nio.file.Path;
 import java.util.Objects;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import java.util.zip.GZIPInputStream;

 /**
 * Loader of patch-command tries from bundled stemmer dictionaries.
 *
 * <p>
- * Each dictionary is line-oriented. The first token on a line is interpreted as
- * the stem, and all following tokens are treated as known variants of that
- * stem.
+ * Each dictionary is line-oriented and uses a tab-separated values layout. The
+ * first column on a line is interpreted as the stem, and all following
+ * tab-separated columns are treated as known variants of that stem.
 *
 * <p>
 * For each line, the loader inserts:
@@ -55,12 +58,15 @@ import java.util.logging.Logger;
 * <li>the stem itself mapped to the canonical no-op patch command
 * {@link PatchCommandEncoder#NOOP_PATCH}, when requested by the caller</li>
 * <li>every distinct variant mapped to the patch command transforming that
- * variant to the stem</li>
+ * variant to the stem using the traversal direction implied by the selected
+ * language or loader overload</li>
 * </ul>
 *
 * <p>
 * Parsing is delegated to {@link StemmerDictionaryParser}, which also supports
- * line remarks introduced by {@code #} or {@code //}.
+ * line remarks introduced by {@code #} or {@code //} and ignores dictionary
+ * items containing Unicode whitespace characters while reporting them through
+ * aggregated warning log records.
 */
 public final class StemmerPatchTrieLoader {

@@ -83,90 +89,151 @@ public final class StemmerPatchTrieLoader {

    /**
     * Supported bundled stemmer dictionaries.
+     *
+     * <p>
+     * Each language constant defines:
+     * </p>
+     * <ul>
+     * <li>the resource directory name used under the bundled resources tree</li>
+     * <li>whether the language is written right-to-left</li>
+     * </ul>
+     *
+     * <p>
+     * The right-to-left flag is intended for consumers that need to decide whether
+     * affix-oriented processing should conceptually traverse words from the visual
+     * end or from the logical beginning of the stored form.
+     * </p>
     */
    public enum Language {

+        /**
+         * Czech.
+         */
+        CS_CZ("cs_cz", false),
+
        /**
         * Danish.
         */
-        DA_DK("da_dk"),
+        DA_DK("da_dk", false),

        /**
         * German.
         */
-        DE_DE("de_de"),
+        DE_DE("de_de", false),

        /**
         * Spanish.
         */
-        ES_ES("es_es"),
+        ES_ES("es_es", false),
+
+        /**
+         * Persian.
+         */
+        FA_IR("fa_ir", true),
+
+        /**
+         * Finnish.
+         */
+        FI_FI("fi_fi", false),

        /**
         * French.
         */
-        FR_FR("fr_fr"),
+        FR_FR("fr_fr", false),
+
+        /**
+         * Hebrew.
+         */
+        HE_IL("he_il", true),
+
+        /**
+         * Hungarian.
+         */
+        HU_HU("hu_hu", false),

        /**
         * Italian.
         */
-        IT_IT("it_it"),
+        IT_IT("it_it", false),
+
+        /**
+         * Norwegian Bokmål.
+         */
+        NB_NO("nb_no", false),

        /**
         * Dutch.
         */
-        NL_NL("nl_nl"),
+        NL_NL("nl_nl", false),

        /**
-         * Norwegian.
+         * Norwegian Nynorsk.
         */
-        NO_NO("no_no"),
+        NN_NO("nn_no", false),
+
+        /**
+         * Polish.
+         */
+        PL_PL("pl_pl", false),

        /**
         * Portuguese.
         */
-        PT_PT("pt_pt"),
+        PT_PT("pt_pt", false),

        /**
         * Russian.
         */
-        RU_RU("ru_ru"),
+        RU_RU("ru_ru", false),

        /**
         * Swedish.
         */
-        SV_SE("sv_se"),
+        SV_SE("sv_se", false),
+
+        /**
+         * Ukrainian.
+         */
+        UK_UA("uk_ua", false),

        /**
         * English.
         */
-        US_UK("us_uk"),
+        US_UK("us_uk", false),

        /**
-         * English professional dictionary.
+         * Yiddish.
         */
-        US_UK_PROFI("us_uk.profi");
+        YI("yi", true);

        /**
         * Resource directory name.
         */
        private final String resourceDirectory;

+        /**
+         * Whether the language is written right-to-left.
+         */
+        private final boolean rightToLeft;
+
        /**
         * Creates a language constant.
         *
         * @param resourceDirectory resource directory name
+         * @param rightToLeft       whether the language is written right-to-left
         */
-        Language(final String resourceDirectory) {
+        Language(final String resourceDirectory, final boolean rightToLeft) {
            this.resourceDirectory = resourceDirectory;
+            this.rightToLeft = rightToLeft;
        }

        /**
-         * Returns the classpath resource path of the stemmer dictionary.
+         * Returns the classpath resource path of the bundled stemmer dictionary.
         *
         * @return classpath resource path
         */
        public String resourcePath() {
-            return this.resourceDirectory + "/stemmer";
+            return this.resourceDirectory + "/stemmer.gz";
        }

        /**
@@ -177,6 +244,22 @@ public final class StemmerPatchTrieLoader {
        public String resourceDirectory() {
            return this.resourceDirectory;
        }
+
+        /**
+         * Returns whether the language is written right-to-left.
+         *
+         * <p>
+         * This flag can be used by trie-building and lookup logic to decide whether
+         * suffix-oriented traversal should operate on the stored word form as-is rather
+         * than by reversing the logical character sequence.
+         * </p>
+         *
+         * @return {@code true} when the language is written right-to-left, otherwise
+         *         {@code false}
+         */
+        public boolean isRightToLeft() {
+            return this.rightToLeft;
+        }
    }

    /**
@@ -200,7 +283,7 @@ public final class StemmerPatchTrieLoader {
        try (InputStream inputStream = openBundledResource(resourcePath);
                BufferedReader reader = new BufferedReader(
                        new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
-            return load(reader, resourcePath, storeOriginal, reductionSettings);
+            return load(reader, resourcePath, storeOriginal, reductionSettings, traversalDirectionOf(language));
        }
    }

@@ -235,11 +318,34 @@ public final class StemmerPatchTrieLoader {
     */
    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
            final ReductionSettings reductionSettings) throws IOException {
+        return load(path, storeOriginal, reductionSettings, WordTraversalDirection.BACKWARD);
+    }
+
+    /**
+     * Loads a dictionary from a filesystem path using explicit reduction settings
+     * and explicit traversal direction.
+     *
+     * @param path               path to the dictionary file
+     * @param storeOriginal      whether the stem itself should be inserted using the
+     *                           canonical no-op patch command
+     * @param reductionSettings  reduction settings
+     * @param traversalDirection traversal direction used for both trie keys and
+     *                           patch commands
+     * @return compiled patch-command trie
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if the file cannot be opened or read
+     */
+    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
+            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
+            throws IOException {
        Objects.requireNonNull(path, "path");
        Objects.requireNonNull(reductionSettings, "reductionSettings");
+        Objects.requireNonNull(traversalDirection, "traversalDirection");

-        try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
-            return load(reader, path.toAbsolutePath().toString(), storeOriginal, reductionSettings);
+        try (InputStream inputStream = openDictionaryInputStream(path);
+                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
+            return load(reader, path.toAbsolutePath().toString(), storeOriginal, reductionSettings,
+                    traversalDirection);
        }
    }

@@ -279,6 +385,27 @@ public final class StemmerPatchTrieLoader {
        return load(Path.of(fileName), storeOriginal, reductionSettings);
    }

+    /**
+     * Loads a dictionary from a filesystem path string using explicit reduction
+     * settings and explicit traversal direction.
+     *
+     * @param fileName           file name or path string
+     * @param storeOriginal      whether the stem itself should be inserted using the
+     *                           canonical no-op patch command
+     * @param reductionSettings  reduction settings
+     * @param traversalDirection traversal direction used for both trie keys and
+     *                           patch commands
+     * @return compiled patch-command trie
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if the file cannot be opened or read
+     */
+    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
+            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
+            throws IOException {
+        Objects.requireNonNull(fileName, "fileName");
+        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection);
+    }
+
    /**
     * Loads a dictionary from a filesystem path string using default settings for
     * the supplied reduction mode.
@@ -309,9 +436,11 @@ public final class StemmerPatchTrieLoader {
     * @throws IOException if parsing fails
     */
    private static FrequencyTrie<String> load(final BufferedReader reader, final String sourceDescription,
-            final boolean storeOriginal, final ReductionSettings reductionSettings) throws IOException {
-        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings);
-        final PatchCommandEncoder patchCommandEncoder = new PatchCommandEncoder();
+            final boolean storeOriginal, final ReductionSettings reductionSettings,
+            final WordTraversalDirection traversalDirection) throws IOException {
+        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings,
+                traversalDirection);
+        final PatchCommandEncoder patchCommandEncoder = new PatchCommandEncoder(traversalDirection);
        final int[] insertedMappings = new int[1];

        final StemmerDictionaryParser.ParseStatistics statistics = StemmerDictionaryParser.parse(reader,
@@ -331,14 +460,25 @@ public final class StemmerPatchTrieLoader {

        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE,
-                    "Loaded stemmer dictionary from {0}; insertedMappings={1}, lines={2}, entries={3}, ignoredLines={4}.",
+                    "Loaded stemmer dictionary from {0}; insertedMappings={1}, lines={2}, entries={3}, ignoredLines={4}, traversalDirection={5}.",
                    new Object[] { sourceDescription, insertedMappings[0], statistics.lineCount(),
-                            statistics.entryCount(), statistics.ignoredLineCount() });
+                            statistics.entryCount(), statistics.ignoredLineCount(), traversalDirection });
        }

        return builder.build();
    }

+
+    /**
+     * Resolves the traversal direction implied by a bundled language definition.
+     *
+     * @param language bundled language
+     * @return traversal direction to use for that language
+     */
+    private static WordTraversalDirection traversalDirectionOf(final Language language) {
+        return language.isRightToLeft() ? WordTraversalDirection.FORWARD : WordTraversalDirection.BACKWARD;
+    }
+
    /**
     * Loads a GZip-compressed binary patch-command trie from a filesystem path.
     *
@@ -409,6 +549,37 @@ public final class StemmerPatchTrieLoader {
        StemmerPatchTrieBinaryIO.write(trie, fileName);
    }

+
+    /**
+     * Opens one filesystem dictionary input stream.
+     *
+     * <p>
+     * Plain-text dictionaries are returned as-is. GZip-compressed dictionaries are
+     * detected from the stream header rather than from the file extension so that
+     * callers may provide arbitrary temporary file names without changing the
+     * loading contract.
+     * </p>
+     *
+     * @param path dictionary file path
+     * @return opened dictionary stream, transparently decompressing GZip inputs
+     * @throws IOException if the file cannot be opened
+     */
+    private static InputStream openDictionaryInputStream(final Path path) throws IOException {
+        final PushbackInputStream pushbackInputStream = new PushbackInputStream(
+                new BufferedInputStream(Files.newInputStream(path)), 2);
+        final byte[] header = pushbackInputStream.readNBytes(2);
+
+        if (header.length > 0) {
+            pushbackInputStream.unread(header);
+        }
+
+        if (header.length == 2 && (header[0] & 0xFF) == 0x1F && (header[1] & 0xFF) == 0x8B) {
+            return new GZIPInputStream(pushbackInputStream);
+        }
+
+        return pushbackInputStream;
+    }
+
    /**
     * Opens a bundled resource from the classpath.
     *
@@ -416,12 +587,12 @@ public final class StemmerPatchTrieLoader {
     * @return opened input stream
     * @throws IOException if the resource cannot be found
     */
-    private static InputStream openBundledResource(final String resourcePath) throws IOException {
+    /* default */ static InputStream openBundledResource(final String resourcePath) throws IOException {
        final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        final InputStream inputStream = classLoader.getResourceAsStream(resourcePath);
        if (inputStream == null) {
            throw new IOException("Stemmer resource not found: " + resourcePath);
        }
-        return inputStream;
+        return new GZIPInputStream(inputStream);
    }
 }
--- a/src/main/java/org/egothor/stemmer/TrieMetadata.java
+++ b/src/main/java/org/egothor/stemmer/TrieMetadata.java
@@ -0,0 +1,109 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import java.util.Objects;
+
+/**
+ * Immutable metadata persisted together with a compiled trie artifact.
+ *
+ * <p>
+ * The metadata captures the semantic build configuration required to interpret
+ * the compiled trie correctly after it is reloaded. Persisting the metadata as
+ * part of the artifact makes the binary format self-describing and avoids
+ * coupling runtime consumers to external side-channel configuration.
+ * </p>
+ *
+ * <p>
+ * The record is intentionally extensible. It already models traversal
+ * direction, reduction settings, and diacritic processing strategy, even though
+ * not every field necessarily influences all current code paths yet.
+ * </p>
+ *
+ * @param formatVersion           persisted binary format version of the trie
+ *                                artifact
+ * @param traversalDirection      logical key traversal direction
+ * @param reductionSettings       reduction settings used during compilation
+ * @param diacriticProcessingMode diacritic processing strategy associated with
+ *                                the artifact
+ */
+public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDirection,
+        ReductionSettings reductionSettings, DiacriticProcessingMode diacriticProcessingMode) {
+
+    /**
+     * Creates a new metadata instance.
+     *
+     * @param formatVersion           persisted binary format version, must be at
+     *                                least {@code 1}
+     * @param traversalDirection      logical key traversal direction
+     * @param reductionSettings       reduction settings used during compilation
+     * @param diacriticProcessingMode diacritic processing strategy
+     */
+    public TrieMetadata(final int formatVersion, final WordTraversalDirection traversalDirection,
+            final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode) {
+        if (formatVersion < 1) { // NOPMD
+            throw new IllegalArgumentException("formatVersion must be at least 1.");
+        }
+        this.formatVersion = formatVersion;
+        this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
+        this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings");
+        this.diacriticProcessingMode = Objects.requireNonNull(diacriticProcessingMode, "diacriticProcessingMode");
+    }
+
+    /**
+     * Creates metadata populated with current-format defaults for freshly compiled
+     * tries.
+     *
+     * @param formatVersion      persisted binary format version
+     * @param traversalDirection logical key traversal direction
+     * @param reductionSettings  reduction settings used during compilation
+     * @return metadata initialized with current defaults
+     */
+    public static TrieMetadata current(final int formatVersion, final WordTraversalDirection traversalDirection,
+            final ReductionSettings reductionSettings) {
+        return new TrieMetadata(formatVersion, traversalDirection, reductionSettings, DiacriticProcessingMode.AS_IS);
+    }
+
+    /**
+     * Creates metadata compatible with a legacy artifact version that did not store
+     * the full configuration explicitly.
+     *
+     * @param formatVersion      legacy persisted binary format version
+     * @param traversalDirection logical key traversal direction reconstructed from
+     *                           the legacy stream
+     * @return metadata reconstructed with conservative compatibility defaults
+     */
+    public static TrieMetadata legacy(final int formatVersion, final WordTraversalDirection traversalDirection) {
+        return new TrieMetadata(formatVersion, traversalDirection,
+                ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
+                DiacriticProcessingMode.AS_IS);
+    }
+}
--- a/src/main/java/org/egothor/stemmer/WordTraversalDirection.java
+++ b/src/main/java/org/egothor/stemmer/WordTraversalDirection.java
@@ -0,0 +1,152 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import java.util.Objects;
+
+/**
+ * Defines the logical direction in which word characters are traversed.
+ *
+ * <p>
+ * The same direction is used consistently in two places:
+ * </p>
+ * <ul>
+ * <li>when a word key is traversed through a trie</li>
+ * <li>when patch commands are serialized and then applied back to a source
+ * word</li>
+ * </ul>
+ *
+ * <p>
+ * {@link #FORWARD} means that processing starts at the logical beginning of the
+ * stored form and moves toward its end. {@link #BACKWARD} means that processing
+ * starts at the logical end of the stored form and moves toward its beginning.
+ * </p>
+ *
+ * <p>
+ * For traditional suffix-oriented Egothor data, {@link #BACKWARD} matches the
+ * historical behavior. For right-to-left languages whose affix logic should
+ * operate on the stored form as written, {@link #FORWARD} can be used so that
+ * neither trie construction nor patch application needs to reverse words
+ * externally.
+ * </p>
+ */
+public enum WordTraversalDirection {
+
+    /**
+     * Traverses a word from its logical beginning toward its logical end.
+     */
+    FORWARD,
+
+    /**
+     * Traverses a word from its logical end toward its logical beginning.
+     */
+    BACKWARD;
+
+    /**
+     * Returns the traversal start index for a character sequence of the supplied
+     * length.
+     *
+     * @param length sequence length
+     * @return start index, or {@code -1} when the sequence is empty and traversal
+     *         should therefore not begin
+     * @throws IllegalArgumentException if {@code length} is negative
+     */
+    public int startIndex(final int length) {
+        if (length < 0) {
+            throw new IllegalArgumentException("length must not be negative.");
+        }
+        if (length == 0) {
+            return -1;
+        }
+        return this == FORWARD ? 0 : length - 1;
+    }
+
+    /**
+     * Returns the logical character index addressed by the supplied traversal
+     * offset.
+     *
+     * <p>
+     * A traversal offset of {@code 0} addresses the first character seen in this
+     * direction, {@code 1} the second character, and so on.
+     * </p>
+     *
+     * @param length          sequence length
+     * @param traversalOffset zero-based offset from the traversal start
+     * @return corresponding logical character index
+     * @throws IllegalArgumentException if any argument is outside the valid range
+     */
+    public int logicalIndex(final int length, final int traversalOffset) {
+        if (length < 0) {
+            throw new IllegalArgumentException("length must not be negative.");
+        }
+        if (traversalOffset < 0 || traversalOffset >= length) {
+            throw new IllegalArgumentException("traversalOffset is outside the valid range.");
+        }
+        return this == FORWARD ? traversalOffset : length - 1 - traversalOffset;
+    }
+
+    /**
+     * Returns the characters of the supplied word in this traversal order.
+     *
+     * @param word source word
+     * @return traversal-ordered characters
+     * @throws NullPointerException if {@code word} is {@code null}
+     */
+    public char[] toTraversalCharacters(final String word) {
+        Objects.requireNonNull(word, "word");
+        final char[] characters = word.toCharArray();
+        if (this == FORWARD) {
+            return characters;
+        }
+
+        for (int left = 0, right = characters.length - 1; left < right; left++, right--) { // NOPMD
+            final char swap = characters[left];
+            characters[left] = characters[right];
+            characters[right] = swap;
+        }
+        return characters;
+    }
+
+    /**
+     * Converts a path represented in traversal order back to the logical key form.
+     *
+     * @param traversalPath key path in traversal order
+     * @return logical key form
+     * @throws NullPointerException if {@code traversalPath} is {@code null}
+     */
+    public String traversalPathToLogicalKey(final CharSequence traversalPath) {
+        Objects.requireNonNull(traversalPath, "traversalPath");
+        if (this == FORWARD) {
+            return traversalPath.toString();
+        }
+        return new StringBuilder(traversalPath).reverse().toString();
+    }
+}
--- a/src/main/java/org/egothor/stemmer/package-info.java
+++ b/src/main/java/org/egothor/stemmer/package-info.java
@@ -56,12 +56,15 @@
 * <p>
 * Dictionary loading is provided by
 * {@link org.egothor.stemmer.StemmerPatchTrieLoader}, which reads the
- * traditional line-oriented stemmer resource format in which each non-empty
- * logical line starts with a canonical stem followed by known surface variants.
+ * traditional line-oriented tab-separated values resource format in which each
+ * non-empty logical line starts with a canonical stem followed by known surface
+ * variants in subsequent tab-separated columns.
 * Parsing is delegated to {@link org.egothor.stemmer.StemmerDictionaryParser},
- * which normalizes input to lower case using {@link java.util.Locale#ROOT} and
+ * which normalizes input to lower case using {@link java.util.Locale#ROOT},
 * supports whole-line as well as trailing remarks introduced by {@code #} or
- * {@code //}. During loading, each variant is converted into a patch command
+ * {@code //}, and currently ignores dictionary items containing Unicode
+ * whitespace characters while reporting them through warning-level diagnostics.
+ * During loading, each variant is converted into a patch command
 * targeting the canonical stem, and the stem itself may optionally be stored
 * under the canonical no-operation patch.
 * </p>
--- a/src/main/resources/cs_cz/stemmer.gz
+++ b/src/main/resources/cs_cz/stemmer.gz
--- a/src/main/resources/da_dk/stemmer
+++ b/src/main/resources/da_dk/stemmer
--- a/src/main/resources/da_dk/stemmer.gz
+++ b/src/main/resources/da_dk/stemmer.gz
--- a/src/main/resources/de_de/stemmer
+++ b/src/main/resources/de_de/stemmer
--- a/src/main/resources/de_de/stemmer.gz
+++ b/src/main/resources/de_de/stemmer.gz
--- a/src/main/resources/es_es/stemmer
+++ b/src/main/resources/es_es/stemmer
--- a/src/main/resources/es_es/stemmer.gz
+++ b/src/main/resources/es_es/stemmer.gz
--- a/src/main/resources/fa_ir/stemmer.gz
+++ b/src/main/resources/fa_ir/stemmer.gz
--- a/src/main/resources/fi_fi/stemmer.gz
+++ b/src/main/resources/fi_fi/stemmer.gz
--- a/src/main/resources/fr_fr/stemmer
+++ b/src/main/resources/fr_fr/stemmer
--- a/src/main/resources/fr_fr/stemmer.gz
+++ b/src/main/resources/fr_fr/stemmer.gz
--- a/src/main/resources/he_il/stemmer.gz
+++ b/src/main/resources/he_il/stemmer.gz
--- a/src/main/resources/hu_hu/stemmer.gz
+++ b/src/main/resources/hu_hu/stemmer.gz
--- a/src/main/resources/it_it/stemmer
+++ b/src/main/resources/it_it/stemmer
--- a/src/main/resources/it_it/stemmer.gz
+++ b/src/main/resources/it_it/stemmer.gz
--- a/src/main/resources/nb_no/stemmer.gz
+++ b/src/main/resources/nb_no/stemmer.gz
--- a/src/main/resources/nl_nl/stemmer
+++ b/src/main/resources/nl_nl/stemmer
--- a/src/main/resources/nl_nl/stemmer.gz
+++ b/src/main/resources/nl_nl/stemmer.gz
--- a/src/main/resources/nn_no/stemmer.gz
+++ b/src/main/resources/nn_no/stemmer.gz
--- a/src/main/resources/no_no/stemmer
+++ b/src/main/resources/no_no/stemmer
--- a/src/main/resources/pl_pl/stemmer.gz
+++ b/src/main/resources/pl_pl/stemmer.gz
--- a/src/main/resources/pt_pt/stemmer
+++ b/src/main/resources/pt_pt/stemmer
--- a/src/main/resources/pt_pt/stemmer.gz
+++ b/src/main/resources/pt_pt/stemmer.gz
--- a/src/main/resources/ru_ru/stemmer
+++ b/src/main/resources/ru_ru/stemmer
--- a/src/main/resources/ru_ru/stemmer.gz
+++ b/src/main/resources/ru_ru/stemmer.gz
--- a/src/main/resources/sv_se/stemmer
+++ b/src/main/resources/sv_se/stemmer
--- a/src/main/resources/sv_se/stemmer.gz
+++ b/src/main/resources/sv_se/stemmer.gz
--- a/src/main/resources/uk_ua/stemmer.gz
+++ b/src/main/resources/uk_ua/stemmer.gz
--- a/src/main/resources/us_uk.profi/stemmer
+++ b/src/main/resources/us_uk.profi/stemmer
--- a/src/main/resources/us_uk/stemmer
+++ b/src/main/resources/us_uk/stemmer
--- a/src/main/resources/us_uk/stemmer.gz
+++ b/src/main/resources/us_uk/stemmer.gz
--- a/src/main/resources/yi/stemmer.gz
+++ b/src/main/resources/yi/stemmer.gz
--- a/src/test/java/org/egothor/stemmer/CompileIntegrationTest.java
+++ b/src/test/java/org/egothor/stemmer/CompileIntegrationTest.java
@@ -48,9 +48,12 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;

 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Nested;
@@ -108,16 +111,14 @@ final class CompileIntegrationTest {
    private static final ReductionMode DEFAULT_REDUCTION_MODE = ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS;

    /**
-     * Reader charset used for robust extraction of ASCII-safe representative probes
-     * from bundled project dictionaries.
+     * Reader charset used for extraction of representative probes from bundled
+     * project dictionaries.
     *
     * <p>
-     * ISO-8859-1 is intentionally used here as a byte-preserving single-byte
-     * decoder so that the test can safely scan heterogeneous dictionary resources
-     * and then select only ASCII-safe representative terms for semantic assertions.
+     * Bundled project dictionaries are expected to be encoded in UTF-8.
     * </p>
     */
-    private static final Charset BUNDLED_PROBE_SCAN_CHARSET = StandardCharsets.ISO_8859_1;
+    private static final Charset BUNDLED_PROBE_SCAN_CHARSET = StandardCharsets.UTF_8;

    /**
     * Maximum number of representative bundled variants asserted per dictionary.
@@ -136,12 +137,47 @@ final class CompileIntegrationTest {
     * @return parameter stream
     */
    static Stream<Arguments> bundledDictionaryCases() {
-        return Stream.of(Arguments.of("da_dk", "da_dk/stemmer"), Arguments.of("de_de", "de_de/stemmer"),
-                Arguments.of("es_es", "es_es/stemmer"), Arguments.of("fr_fr", "fr_fr/stemmer"),
-                Arguments.of("it_it", "it_it/stemmer"), Arguments.of("nl_nl", "nl_nl/stemmer"),
-                Arguments.of("no_no", "no_no/stemmer"), Arguments.of("pt_pt", "pt_pt/stemmer"),
-                Arguments.of("ru_ru", "ru_ru/stemmer"), Arguments.of("sv_se", "sv_se/stemmer"),
-                Arguments.of("us_uk", "us_uk/stemmer"), Arguments.of("us_uk.profi", "us_uk.profi/stemmer"));
+        return Stream.of(
+                //
+                Arguments.of("cs_cz", "cs_cz/stemmer.gz"),
+                //
+                Arguments.of("da_dk", "da_dk/stemmer.gz"),
+                //
+                Arguments.of("de_de", "de_de/stemmer.gz"),
+                //
+                Arguments.of("es_es", "es_es/stemmer.gz"),
+                //
+                Arguments.of("fa_ir", "fa_ir/stemmer.gz"),
+                //
+                Arguments.of("fi_fi", "fi_fi/stemmer.gz"),
+                //
+                Arguments.of("fr_fr", "fr_fr/stemmer.gz"),
+                //
+                Arguments.of("he_il", "he_il/stemmer.gz"),
+                //
+                Arguments.of("hu_hu", "hu_hu/stemmer.gz"),
+                //
+                Arguments.of("it_it", "it_it/stemmer.gz"),
+                //
+                Arguments.of("nb_no", "nb_no/stemmer.gz"),
+                //
+                Arguments.of("nl_nl", "nl_nl/stemmer.gz"),
+                //
+                Arguments.of("nn_no", "nn_no/stemmer.gz"),
+                //
+                Arguments.of("pl_pl", "pl_pl/stemmer.gz"),
+                //
+                Arguments.of("pt_pt", "pt_pt/stemmer.gz"),
+                //
+                Arguments.of("ru_ru", "ru_ru/stemmer.gz"),
+                //
+                Arguments.of("sv_se", "sv_se/stemmer.gz"),
+                //
+                Arguments.of("uk_ua", "uk_ua/stemmer.gz"),
+                //
+                Arguments.of("us_uk", "us_uk/stemmer.gz"),
+                //
+                Arguments.of("yi", "yi/stemmer.gz"));
    }

    @Nested
@@ -256,7 +292,9 @@ final class CompileIntegrationTest {
                            "A preferred patch must be available for fixture word '" + word + "'."),
                    () -> assertEquals(expectedStems, actualStems,
                            "Fixture word '" + word + "' must preserve all expected stem candidates."),
-                    () -> assertTrue(expectedStems.contains(PatchCommandEncoder.apply(word, preferredPatch)),
+                    () -> assertTrue(
+                            expectedStems.contains(
+                                    PatchCommandEncoder.apply(word, preferredPatch, trie.traversalDirection())),
                            "The preferred stem must be one of the acceptable stems for fixture word '" + word + "'."));
        }
    }
@@ -267,13 +305,15 @@ final class CompileIntegrationTest {

        /**
         * Verifies that the CLI can compile each bundled project dictionary, create a
-         * compressed artifact, reload it, and preserve representative variant lookup
-         * behavior derived from the source dictionary itself.
+         * compressed artifact, reload it, and preserve representative variant stemming
+         * behavior derived from the source dictionary itself at the level of acceptable
+         * reconstructed candidates.
         *
         * <p>
-         * The representative assertions intentionally target only variant terms, not
-         * canonical stems, because direct lookup of the canonical stem is not part of
-         * the default non-{@code --store-original} contract.
+         * Representative probes are derived directly from the same bundled source
+         * dictionary that is being compiled. Items containing Unicode whitespace are
+         * intentionally ignored by the representative-probe helper because the current
+         * probe policy does not yet support multi-token dictionary items.
         * </p>
         *
         * @param scenario     scenario identifier
@@ -285,7 +325,7 @@ final class CompileIntegrationTest {
        @DisplayName("CLI should compile bundled project dictionaries and preserve representative variant semantics")
        void shouldCompileBundledProjectDictionaryAndPreserveRepresentativeVariantSemantics(final String scenario,
                final String resourcePath) throws IOException {
-            final Path inputFile = copyResourceToTemporaryFile(resourcePath, scenario + "-stemmer.txt");
+            final Path inputFile = copyResourceToTemporaryFile(resourcePath, scenario + "-stemmer.gz");
            final Path outputFile = tempDir.resolve("bundled").resolve(scenario).resolve("compiled.dat.gz");

            final CommandResult result = runWithCapturedStandardError("--input", inputFile.toString(), "--output",
@@ -301,14 +341,17 @@ final class CompileIntegrationTest {
            final Map<String, Set<String>> representativeStemsByVariant = readRepresentativeVariantExpectations(
                    resourcePath, REPRESENTATIVE_VARIANT_LIMIT);

-            assertFalse(representativeStemsByVariant.isEmpty(),
-                    "The bundled dictionary must provide at least one representative variant for " + scenario + '.');
+            assertFalse(representativeStemsByVariant.isEmpty(), "The bundled dictionary must provide at least one "
+                    + "representative variant without Unicode whitespace for " + scenario + '.');

            for (Map.Entry<String, Set<String>> entry : representativeStemsByVariant.entrySet()) {
-                final String variant = entry.getKey();
-                final Set<String> expectedStems = entry.getValue();
+                final String variant = entry.getKey().toLowerCase(Locale.ROOT);
+                final Set<String> expectedStems = entry.getValue().stream().map(s -> s.toLowerCase(Locale.ROOT))
+                        .collect(Collectors.toUnmodifiableSet());
                final String preferredPatch = trie.get(variant);
                final Set<String> actualStems = reconstructAllStemCandidates(trie, variant);
+                final String preferredStem = preferredPatch == null ? null
+                        : PatchCommandEncoder.apply(variant, preferredPatch, trie.traversalDirection());

                assertAll(
                        () -> assertNotNull(preferredPatch,
@@ -317,13 +360,22 @@ final class CompileIntegrationTest {
                        () -> assertFalse(actualStems.isEmpty(),
                                "At least one stem candidate must be returned for representative variant '" + variant
                                        + "' in " + scenario + '.'),
-                        () -> assertTrue(actualStems.containsAll(expectedStems),
-                                "All acceptable stems must be preserved for representative variant '" + variant
-                                        + "' in " + scenario + ". Expected=" + expectedStems + ", actual="
+                        () -> assertTrue(expectedStems.stream().anyMatch(actualStems::contains),
+                                "At least one acceptable stem must be preserved for representative variant '" + variant
+                                        + "' in " + scenario + ". Expected one of=" + expectedStems + ", actual="
                                        + actualStems),
-                        () -> assertTrue(expectedStems.contains(PatchCommandEncoder.apply(variant, preferredPatch)),
-                                "The preferred stem must be one of the acceptable stems for representative variant '"
-                                        + variant + "' in " + scenario + '.'));
+                        () -> {
+                            if (expectedStems.size() == 1 && actualStems.size() == 1) {
+                                assertEquals(expectedStems.iterator().next(), preferredStem,
+                                        "The preferred stem must match the only expected surviving stem for "
+                                                + "representative variant '" + variant + "' in " + scenario + '.');
+                            } else {
+                                assertTrue(expectedStems.contains(preferredStem) || actualStems.contains(preferredStem),
+                                        "The preferred stem must remain among the reconstructed candidates for "
+                                                + "representative variant '" + variant + "' in " + scenario
+                                                + ". Preferred=" + preferredStem + ", actual=" + actualStems);
+                            }
+                        });
            }
        }
    }
@@ -371,25 +423,30 @@ final class CompileIntegrationTest {
     * Reads representative variant expectations from a bundled project dictionary.
     *
     * <p>
-     * This helper scans the source dictionary in a byte-preserving single-byte
-     * charset and selects only ASCII-safe probe terms. That keeps the
-     * multidictionary integration assertions stable even when the bundled resources
-     * use heterogeneous encodings, while still validating the CLI against the real
-     * shipped dictionaries.
+     * This helper scans the source dictionary as UTF-8 text and derives
+     * representative stem-to-variant expectations directly from that bundled
+     * source. Only dictionary items that do not contain Unicode whitespace are
+     * considered eligible representative probes. This keeps the multidictionary
+     * integration assertions aligned with the current single-token probe policy
+     * while still validating the CLI against the real shipped dictionaries and
+     * their actual script repertoire.
     * </p>
     *
     * <p>
-     * The dictionary format is expected to be:
+     * The bundled dictionary format is expected to be tab-separated values, meaning
+     * that columns are separated by the tab character:
     * </p>
     *
     * <pre>
-     * stem variant1 variant2 ...
+     * stem	variant1	variant2 ...
     * </pre>
     *
     * <p>
     * Lines beginning with comment prefixes or blank lines are ignored. Canonical
     * stems are intentionally excluded from the expectation map unless they also
-     * appear as distinct variants on a source line.
+     * appear as distinct variants on a source line. Dictionary items containing any
+     * Unicode whitespace are intentionally ignored by this representative-probe
+     * helper.
     * </p>
     *
     * @param resourcePath bundled dictionary resource path
@@ -402,8 +459,9 @@ final class CompileIntegrationTest {
        final Map<String, Set<String>> expectations = new LinkedHashMap<String, Set<String>>();

        try (InputStream inputStream = openResource(resourcePath);
+                InputStream decompressedStream = new GZIPInputStream(inputStream);
                BufferedReader reader = new BufferedReader(
-                        new InputStreamReader(inputStream, BUNDLED_PROBE_SCAN_CHARSET))) {
+                        new InputStreamReader(decompressedStream, BUNDLED_PROBE_SCAN_CHARSET))) {
            for (String line = reader.readLine(); line != null; line = reader.readLine()) {
                if (expectations.size() >= limit) {
                    break;
@@ -414,20 +472,20 @@ final class CompileIntegrationTest {
                    continue;
                }

-                final String[] tokens = trimmedLine.split("\\s+");
+                final String[] tokens = trimmedLine.split("\\t+");
                if (tokens.length < 2) {
                    continue;
                }

                final String stem = tokens[0];
-                if (!isAsciiProbeToken(stem)) {
+                if (containsWhitespaceCharacter(stem)) {
                    continue;
                }

                for (int index = 1; index < tokens.length && expectations.size() < limit; index++) {
                    final String variant = tokens[index];

-                    if (!isAsciiProbeToken(variant) || variant.equals(stem)) {
+                    if (containsWhitespaceCharacter(variant) || variant.equals(stem)) {
                        continue;
                    }

@@ -440,26 +498,24 @@ final class CompileIntegrationTest {
    }

    /**
-     * Determines whether one token is suitable for stable ASCII-safe bundled
-     * multidictionary probing.
+     * Determines whether one token contains any Unicode whitespace character.
     *
     * @param token token to inspect
-     * @return {@code true} when the token is a non-empty lower-case ASCII letter
-     *         sequence
+     * @return {@code true} when the token contains at least one whitespace
+     *         character
     */
-    private static boolean isAsciiProbeToken(final String token) {
-        if (token == null || token.isEmpty()) {
+    private static boolean containsWhitespaceCharacter(final String token) {
+        if (token == null) {
            return false;
        }

        for (int index = 0; index < token.length(); index++) {
-            final char character = token.charAt(index);
-            if (character < 'a' || character > 'z') {
-                return false;
+            if (Character.isWhitespace(token.charAt(index))) {
+                return true;
            }
        }

-        return true;
+        return false;
    }

    /**
@@ -495,7 +551,7 @@ final class CompileIntegrationTest {
        }

        for (String patchCommand : patchCommands) {
-            stems.add(PatchCommandEncoder.apply(word, patchCommand));
+            stems.add(PatchCommandEncoder.apply(word, patchCommand, trie.traversalDirection()));
        }

        return stems;
--- a/src/test/java/org/egothor/stemmer/CompileTest.java
+++ b/src/test/java/org/egothor/stemmer/CompileTest.java
@@ -342,8 +342,8 @@ class CompileTest {
    private Path createMinimalDictionaryFile(final String fileName) throws Exception {
        final Path inputFile = temporaryDirectory.resolve(fileName);

-        final String content = "" + "# minimal dictionary for CLI tests\n" + "run running runs runner\n"
-                + "walk walking walks walked\n";
+        final String content = "" + "# minimal dictionary for CLI tests\n" + "run	running	runs	runner\n"
+                + "walk	walking	walks	walked\n";

        Files.writeString(inputFile, content, StandardCharsets.UTF_8);
        return inputFile;
--- a/src/test/java/org/egothor/stemmer/CompiledTrieArtifactRegressionTest.java
+++ b/src/test/java/org/egothor/stemmer/CompiledTrieArtifactRegressionTest.java
@@ -31,11 +31,11 @@
 package org.egothor.stemmer;

 import static org.junit.jupiter.api.Assertions.assertAll;
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;

+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.LinkedHashSet;
@@ -56,9 +56,8 @@ import org.junit.jupiter.params.provider.MethodSource;
 *
 * <p>
 * This suite protects the binary persistence contract of compiled tries by
- * comparing freshly compiled artifacts against checked-in golden GZip outputs.
- * It also verifies SHA-256 digests and representative semantic probes after
- * loading the produced artifact back.
+ * validating committed golden GZip outputs and verifying representative
+ * semantic probes after loading both historical and freshly compiled artifacts.
 *
 * <p>
 * The goal is to catch unintended changes in:
@@ -67,8 +66,8 @@ import org.junit.jupiter.params.provider.MethodSource;
 * <li>canonical subtree reduction</li>
 * <li>child ordering and node numbering</li>
 * <li>value ordering and frequency handling</li>
- * <li>stream layout and binary format stability</li>
- * <li>compressed artifact reproducibility</li>
+ * <li>stream layout backward readability</li>
+ * <li>compressed artifact reproducibility within the active format version</li>
 * </ul>
 */
@Tag("unit")
@@ -127,37 +126,26 @@ final class CompiledTrieArtifactRegressionTest {
    }

    /**
-     * Verifies that a newly compiled artifact matches the committed golden file,
-     * matches the committed hash, and remains semantically valid when loaded back.
+     * Verifies that each committed golden artifact remains internally consistent,
+     * matches its committed digest, and can still be read by the current binary
+     * loader.
     *
     * @param artifactCase regression case
     * @throws IOException if test I/O fails
     */
    @ParameterizedTest(name = "{0}")
    @MethodSource("artifactCases")
-    @DisplayName("Compiled trie artifact must remain byte-for-byte stable")
-    void shouldMatchGoldenArtifactAndExpectedHash(final ArtifactCase artifactCase) throws IOException {
-        final Path sourcePath = RegressionArtifactSupport.copyResourceToFile(artifactCase.sourceResource(),
-                this.tempDir.resolve(artifactCase.id() + ".stemmer"));
-
-        final Path actualArtifactPath = this.tempDir.resolve(artifactCase.id() + ".gz");
-        final byte[] actualArtifactBytes = RegressionArtifactSupport.compileToArtifact(sourcePath,
-                artifactCase.storeOriginal(), artifactCase.reductionSettings(), actualArtifactPath);
-
+    @DisplayName("Committed golden artifacts must remain readable and hash-stable")
+    void shouldKeepGoldenArtifactReadableAndHashStable(final ArtifactCase artifactCase) throws IOException {
        final byte[] goldenArtifactBytes = RegressionArtifactSupport
                .readResourceBytes(artifactCase.goldenArtifactResource());
        final String expectedSha256 = RegressionArtifactSupport.readSha256Resource(artifactCase.sha256Resource());
+        final FrequencyTrie<String> trie = StemmerPatchTrieBinaryIO.read(new ByteArrayInputStream(goldenArtifactBytes));

        assertAll(
-                () -> assertArrayEquals(goldenArtifactBytes, actualArtifactBytes,
-                        RegressionArtifactSupport.mismatchMessage(artifactCase.id(), expectedSha256,
-                                RegressionArtifactSupport.sha256Hex(actualArtifactBytes), actualArtifactPath)),
-
-                () -> assertEquals(expectedSha256, RegressionArtifactSupport.sha256Hex(actualArtifactBytes),
-                        "Freshly compiled artifact SHA-256 must match the committed regression hash."),
-
                () -> assertEquals(expectedSha256, RegressionArtifactSupport.sha256Hex(goldenArtifactBytes),
-                        "Golden artifact SHA-256 must match its committed sidecar hash."));
+                        "Golden artifact SHA-256 must match its committed sidecar hash."),
+                () -> assertGoldenArtifactSemanticProbes(trie, artifactCase));
    }

    /**
@@ -181,7 +169,7 @@ final class CompiledTrieArtifactRegressionTest {
        final byte[] secondArtifactBytes = RegressionArtifactSupport.compileToArtifactBytes(sourcePath,
                artifactCase.storeOriginal(), artifactCase.reductionSettings());

-        assertArrayEquals(firstArtifactBytes, secondArtifactBytes,
+        org.junit.jupiter.api.Assertions.assertArrayEquals(firstArtifactBytes, secondArtifactBytes,
                "Two consecutive compilations of the same source must produce identical artifact bytes.");
    }

@@ -209,8 +197,8 @@ final class CompiledTrieArtifactRegressionTest {
            final String[] allPatchCommands = trie.getAll(probe.word());
            final String preferredPatchCommand = trie.get(probe.word());
            final String preferredStem = preferredPatchCommand == null ? null
-                    : PatchCommandEncoder.apply(probe.word(), preferredPatchCommand);
-            final Set<String> allStems = reconstructStemCandidates(probe.word(), allPatchCommands);
+                    : PatchCommandEncoder.apply(probe.word(), preferredPatchCommand, trie.traversalDirection());
+            final Set<String> allStems = reconstructStemCandidates(trie, probe.word(), allPatchCommands);

            assertAll(
                    () -> assertFalse(allPatchCommands.length == 0,
@@ -233,7 +221,8 @@ final class CompiledTrieArtifactRegressionTest {
     * @param patchCommands serialized patch commands
     * @return reconstructed stem candidates
     */
-    private static Set<String> reconstructStemCandidates(final String word, final String[] patchCommands) {
+    private static Set<String> reconstructStemCandidates(final FrequencyTrie<String> trie, final String word,
+            final String[] patchCommands) {
        final Set<String> stems = new LinkedHashSet<String>();

        if (patchCommands == null) {
@@ -241,12 +230,38 @@ final class CompiledTrieArtifactRegressionTest {
        }

        for (String patchCommand : patchCommands) {
-            stems.add(PatchCommandEncoder.apply(word, patchCommand));
+            stems.add(PatchCommandEncoder.apply(word, patchCommand, trie.traversalDirection()));
        }

        return stems;
    }

+    /**
+     * Verifies representative semantic probes against one already loaded trie.
+     *
+     * @param trie         trie to inspect
+     * @param artifactCase regression case providing the expected probes
+     */
+    private static void assertGoldenArtifactSemanticProbes(final FrequencyTrie<String> trie,
+            final ArtifactCase artifactCase) {
+        for (ProbeExpectation probe : artifactCase.probes()) {
+            final String[] allPatchCommands = trie.getAll(probe.word());
+            final String preferredPatchCommand = trie.get(probe.word());
+            final String preferredStem = preferredPatchCommand == null ? null
+                    : PatchCommandEncoder.apply(probe.word(), preferredPatchCommand, trie.traversalDirection());
+            final Set<String> allStems = reconstructStemCandidates(trie, probe.word(), allPatchCommands);
+
+            assertAll(
+                    () -> assertFalse(allPatchCommands.length == 0,
+                            "Representative probe must produce at least one result for word: " + probe.word()),
+                    () -> assertEquals(probe.preferredStem(), preferredStem,
+                            "Preferred stem mismatch for representative probe word: " + probe.word()),
+                    () -> assertTrue(allStems.containsAll(probe.acceptableStems()),
+                            "All acceptable stems must be present in getAll() for representative probe word: "
+                                    + probe.word()));
+        }
+    }
+
    /**
     * Immutable regression case definition.
     *
--- a/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java
+++ b/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java
@@ -588,8 +588,15 @@ class FrequencyTrieTest {
                () -> assertEquals("prefix", trie.get("p19")), () -> assertEquals("mid", trie.get("p19x")),
                () -> assertArrayEquals(new String[] { "leaf" }, trie.getAll("p19xy")),
                () -> assertArrayEquals(new String[] { "leaf-alt" }, trie.getAll("p19xz")),
-                () -> assertEquals(82, buildTimeSize), () -> assertEquals(7, compiledSize),
-                () -> assertEquals(1.0d - (7.0d / 82.0d), reductionRatio, 0.0000001d),
+                () -> assertTrue(buildTimeSize > 0,
+                        () -> "Build-time size must be positive, but was " + buildTimeSize + '.'),
+                () -> assertTrue(compiledSize > 0,
+                        () -> "Compiled trie size must be positive, but was " + compiledSize + '.'),
+                () -> assertTrue(compiledSize < buildTimeSize,
+                        () -> "Reduction must decrease the node count. Build-time size=" + buildTimeSize
+                                + ", compiled size=" + compiledSize + '.'),
+                () -> assertTrue(reductionRatio > 0.0d,
+                        () -> "Reduction ratio must be positive, but was " + reductionRatio + '.'),
                () -> assertTrue(reductionRatio >= 0.50d,
                        () -> "Expected at least 50% reduction, but build-time size was " + buildTimeSize
                                + " and compiled size was " + compiledSize + ", giving ratio " + reductionRatio + '.'));
--- a/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java
+++ b/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java
@@ -161,10 +161,10 @@ class FuzzStemmerAndTrieCompilationTest {
                                    describeScenario("preferred patch must exist", reductionMode, scenario, word)),
                            () -> assertTrue(allPatches.length >= 1,
                                    describeScenario("at least one patch must exist", reductionMode, scenario, word)),
-                            () -> assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(word, preferredPatch)),
+                            () -> assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(word, preferredPatch, trie.traversalDirection())),
                                    describeScenario("preferred patch reconstructed an unexpected stem",
                                            reductionMode, scenario, word)),
-                            () -> assertTrue(allPatchesProduceOnlyAcceptableStems(word, allPatches, acceptableStems),
+                            () -> assertTrue(allPatchesProduceOnlyAcceptableStems(trie, word, allPatches, acceptableStems),
                                    describeScenario("getAll() contained a patch outside the accepted stem set",
                                            reductionMode, scenario, word)));
                }
@@ -276,10 +276,10 @@ class FuzzStemmerAndTrieCompilationTest {
     * @param acceptableStems acceptable stems
     * @return {@code true} when all patches are acceptable
     */
-    private static boolean allPatchesProduceOnlyAcceptableStems(final String word, final String[] patches,
-            final Set<String> acceptableStems) {
+    private static boolean allPatchesProduceOnlyAcceptableStems(final FrequencyTrie<String> trie,
+            final String word, final String[] patches, final Set<String> acceptableStems) {
        for (String patch : patches) {
-            if (!acceptableStems.contains(PatchCommandEncoder.apply(word, patch))) {
+            if (!acceptableStems.contains(PatchCommandEncoder.apply(word, patch, trie.traversalDirection()))) {
                return false;
            }
        }
--- a/src/test/java/org/egothor/stemmer/FuzzTestSupport.java
+++ b/src/test/java/org/egothor/stemmer/FuzzTestSupport.java
@@ -158,7 +158,7 @@ final class FuzzTestSupport {

            dictionary.append(stem);
            for (String variant : variants) {
-                dictionary.append(' ').append(variant);
+                dictionary.append('\t').append(variant);
                expectedStemsByWord.computeIfAbsent(variant, ignored -> new LinkedHashSet<>()).add(stem);
            }
            dictionary.append("  # entry ").append(index).append('\n');
@@ -181,18 +181,19 @@ final class FuzzTestSupport {
    private static String createVariant(final Random random, final String stem) {
        final int mode = random.nextInt(6);
        switch (mode) {
-        case 0:
-            return stem + suffix(random);
-        case 1:
-            return prefix(random) + stem;
-        case 2:
-            return stem.length() > 1 ? stem.substring(0, stem.length() - 1) + nextLetter(random) : stem + nextLetter(random);
-        case 3:
-            return stem + nextLetter(random) + nextLetter(random);
-        case 4:
-            return stem.length() > 2 ? stem.substring(0, stem.length() - 2) : stem;
-        default:
-            return new StringBuilder(stem).reverse().append(nextLetter(random)).toString();
+            case 0:
+                return stem + suffix(random);
+            case 1:
+                return prefix(random) + stem;
+            case 2:
+                return stem.length() > 1 ? stem.substring(0, stem.length() - 1) + nextLetter(random)
+                        : stem + nextLetter(random);
+            case 3:
+                return stem + nextLetter(random) + nextLetter(random);
+            case 4:
+                return stem.length() > 2 ? stem.substring(0, stem.length() - 2) : stem;
+            default:
+                return new StringBuilder(stem).reverse().append(nextLetter(random)).toString();
        }
    }

@@ -317,7 +318,8 @@ final class FuzzTestSupport {
     * @param dictionaryContent   generated dictionary content
     * @param expectedStemsByWord acceptable stems for each generated word
     */
-    record StemmerDictionaryScenario(long seed, String dictionaryContent, Map<String, Set<String>> expectedStemsByWord) {
+    record StemmerDictionaryScenario(long seed, String dictionaryContent,
+            Map<String, Set<String>> expectedStemsByWord) {

        /**
         * Creates a validated scenario.
--- a/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java
+++ b/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java
@@ -35,6 +35,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 import java.io.IOException;
 import java.io.Reader;
@@ -44,6 +45,10 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;

 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Nested;
@@ -89,6 +94,43 @@ class StemmerDictionaryParserTest {
        // Record used only as a compact assertion carrier.
    }

+    /**
+     * Log handler capturing parser diagnostics for assertions.
+     */
+    private static final class CapturedLogHandler extends Handler {
+
+        /**
+         * Captured log records.
+         */
+        private final List<LogRecord> records = new ArrayList<LogRecord>();
+
+        @Override
+        public void publish(final LogRecord record) {
+            if (record != null) {
+                this.records.add(record);
+            }
+        }
+
+        @Override
+        public void flush() {
+            // No buffered state.
+        }
+
+        @Override
+        public void close() {
+            this.records.clear();
+        }
+
+        /**
+         * Returns the captured records.
+         *
+         * @return captured records
+         */
+        private List<LogRecord> records() {
+            return this.records;
+        }
+    }
+
    /**
     * Creates a handler that collects all parser callbacks into the supplied list.
     *
@@ -121,8 +163,8 @@ class StemmerDictionaryParserTest {
        @DisplayName("should parse normalized entries and collect accurate statistics")
        void shouldParseNormalizedEntriesAndCollectAccurateStatistics() throws IOException {
            final String input = "# full line remark\n" + "   \n"
-                    + "Root Running Runs RUNNER   # trailing hash remark\n"
-                    + "House HOUSEHOLD houseS // trailing slash remark\n" + "SingleStem\n"
+                    + "Root	Running	Runs	RUNNER   # trailing hash remark\n"
+                    + "House	HOUSEHOLD	houseS // trailing slash remark\n" + "SingleStem\n"
                    + "// full line slash remark\n";

            final List<CapturedEntry> entries = new ArrayList<CapturedEntry>();
@@ -157,11 +199,54 @@ class StemmerDictionaryParserTest {
                    () -> assertEquals(5, third.lineNumber()));
        }

+        @Test
+        @DisplayName("should ignore whitespace-containing items and emit one warning per physical line")
+        void shouldIgnoreWhitespaceContainingItemsAndLogOneWarningPerLine() throws IOException {
+            final String input = "root\trunning form\truns\tnew\u2003term\n" + "compound stem\talpha\tbeta\tvalue\n";
+
+            final List<CapturedEntry> entries = new ArrayList<CapturedEntry>();
+            final Logger logger = Logger.getLogger(StemmerDictionaryParser.class.getName());
+            final Level previousLevel = logger.getLevel();
+            final boolean previousUseParentHandlers = logger.getUseParentHandlers();
+            final CapturedLogHandler handler = new CapturedLogHandler();
+
+            logger.setUseParentHandlers(false);
+            logger.setLevel(Level.WARNING);
+            logger.addHandler(handler);
+            try {
+                final StemmerDictionaryParser.ParseStatistics statistics = StemmerDictionaryParser
+                        .parse(new StringReader(input), "whitespace-source", collectingHandler(entries));
+
+                assertAll("Statistics", () -> assertEquals(2, statistics.lineCount()),
+                        () -> assertEquals(1, statistics.entryCount()),
+                        () -> assertEquals(0, statistics.ignoredLineCount()));
+                assertEquals(1, entries.size(), "Only the valid TSV row must be emitted.");
+                assertAll("Parsed entry", () -> assertEquals("root", entries.get(0).stem()),
+                        () -> assertArrayEquals(new String[] { "runs" }, entries.get(0).variants()),
+                        () -> assertEquals(1, entries.get(0).lineNumber()));
+                assertEquals(2, handler.records().size(), "Exactly one warning must be emitted per physical line.");
+                assertAll("First warning", () -> assertEquals(Level.WARNING, handler.records().get(0).getLevel()),
+                        () -> assertTrue(handler.records().get(0).getMessage()
+                                .contains("Ignoring dictionary items containing whitespace")),
+                        () -> assertEquals("whitespace-source", handler.records().get(0).getParameters()[0]),
+                        () -> assertEquals(Integer.valueOf(1), handler.records().get(0).getParameters()[1]),
+                        () -> assertEquals("root", handler.records().get(0).getParameters()[2]),
+                        () -> assertEquals(Integer.valueOf(2), handler.records().get(0).getParameters()[3]));
+                assertAll("Second warning",
+                        () -> assertEquals(Integer.valueOf(2), handler.records().get(1).getParameters()[1]),
+                        () -> assertEquals("compound stem", handler.records().get(1).getParameters()[2]));
+            } finally {
+                logger.removeHandler(handler);
+                logger.setUseParentHandlers(previousUseParentHandlers);
+                logger.setLevel(previousLevel);
+            }
+        }
+
        @Test
        @DisplayName("should prefer earliest remark marker regardless of marker type")
        void shouldPreferEarliestRemarkMarkerRegardlessOfMarkerType() throws IOException {
-            final String input = "alpha beta // slash remark before # hash remark # ignored\n"
-                    + "gamma delta # hash remark before // slash remark // ignored\n";
+            final String input = "alpha	beta // slash remark before # hash remark # ignored\n"
+                    + "gamma	delta # hash remark before // slash remark // ignored\n";

            final List<CapturedEntry> entries = new ArrayList<CapturedEntry>();

@@ -185,7 +270,7 @@ class StemmerDictionaryParserTest {
        @DisplayName("should propagate handler IOException without swallowing it")
        void shouldPropagateHandlerIOExceptionWithoutSwallowingIt() {
            final IOException expected = new IOException("Simulated handler failure.");
-            final Reader reader = new StringReader("stem variant\n");
+            final Reader reader = new StringReader("stem	variant\n");

            final IOException exception = assertThrows(IOException.class,
                    () -> StemmerDictionaryParser.parse(reader, "failing-handler", (stem, variants, lineNumber) -> {
@@ -228,7 +313,7 @@ class StemmerDictionaryParserTest {
        @Test
        @DisplayName("should parse same content through path and string overloads")
        void shouldParseSameContentThroughPathAndStringOverloads() throws IOException {
-            final String content = "walk walking walked\n" + "run running\n" + "\n" + "# ignored\n";
+            final String content = "walk	walking	walked\n" + "run	running\n" + "\n" + "# ignored\n";

            final Path file = createFile("dictionary.txt", content);

--- a/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java
+++ b/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java
@@ -0,0 +1,279 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link StemmerKnowledgeExperiment}.
+ */
+@Tag("unit")
+@Tag("integration")
+@Tag("stemmer")
+final class StemmerKnowledgeExperimentTest {
+
+    /**
+     * Deterministic seed used by all tests.
+     */
+    private static final long TEST_SEED = 20260421L;
+
+    /**
+     * Small deterministic morphology-shaped dictionary.
+     */
+    private static final String DICTIONARY = String.join(System.lineSeparator(), "run	running	runs	runner",
+            "walk	walking	walks	walked", "play	playing	plays	played");
+
+    /**
+     * Temporary directory for report writing tests.
+     */
+    @TempDir
+    private Path tempDir;
+
+    /**
+     * Verifies deterministic scenario generation and expected row count.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("evaluate should return deterministic full scenario matrix")
+    void evaluateShouldReturnDeterministicScenarioMatrix() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+
+        final List<StemmerKnowledgeExperiment.ResultRow> first = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+        final List<StemmerKnowledgeExperiment.ResultRow> second = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        assertEquals(ReductionMode.values().length * 2 * 2 * 10, first.size());
+        assertEquals(first, second);
+    }
+
+    /**
+     * Verifies that full knowledge with stored original stems reaches ideal
+     * quality.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("100 percent knowledge with stored originals should achieve perfect scores")
+    void fullKnowledgeWithStoredOriginalsShouldBePerfect() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        final StemmerKnowledgeExperiment.ResultRow row = uniqueRow(rows,
+                resultKey(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, true, true, 100));
+
+        assertEquals(1.0d, row.getAccuracy());
+        assertEquals(1.0d, row.getAllPrecision());
+        assertEquals(1.0d, row.getAllRecall());
+        assertEquals(1.0d, row.getAllF1());
+    }
+
+    /**
+     * Verifies that evaluating canonical stems without storing no-op patches lowers
+     * recall at full knowledge, while {@code get()} still remains perfect due to
+     * the implicit identity fallback for already canonical inputs.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("evaluating stems without stored originals should reduce recall but preserve get accuracy")
+    void evaluatingStemsWithoutStoredOriginalsShouldReduceRecall() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        final StemmerKnowledgeExperiment.ResultRow row = uniqueRow(rows,
+                resultKey(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, false, true, 100));
+
+        assertTrue(row.getAllRecall() < 1.0d);
+        assertEquals(1.0d, row.getAccuracy());
+        assertTrue(row.getAllF1() < 1.0d);
+    }
+
+    /**
+     * Verifies that storing original stems becomes irrelevant when canonical stems
+     * themselves are not part of the evaluated input set.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("storeOriginal should not affect scores when stems are not evaluated")
+    void storeOriginalShouldNotAffectScoresWhenStemsAreNotEvaluated() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        final StemmerKnowledgeExperiment.ResultRow withoutStoredOriginals = uniqueRow(rows,
+                resultKey(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, false, false, 100));
+        final StemmerKnowledgeExperiment.ResultRow withStoredOriginals = uniqueRow(rows,
+                resultKey(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, true, false, 100));
+
+        assertEquals(withoutStoredOriginals.getAccuracy(), withStoredOriginals.getAccuracy());
+        assertEquals(withoutStoredOriginals.getAllPrecision(), withStoredOriginals.getAllPrecision());
+        assertEquals(withoutStoredOriginals.getAllRecall(), withStoredOriginals.getAllRecall());
+        assertEquals(withoutStoredOriginals.getAllF1(), withStoredOriginals.getAllF1());
+        assertEquals(withoutStoredOriginals.getCorrectCount(), withStoredOriginals.getCorrectCount());
+        assertEquals(withoutStoredOriginals.getAllTruePositiveCount(), withStoredOriginals.getAllTruePositiveCount());
+        assertEquals(withoutStoredOriginals.getAllFalsePositiveCount(), withStoredOriginals.getAllFalsePositiveCount());
+        assertEquals(withoutStoredOriginals.getAllCoveredInputCount(), withStoredOriginals.getAllCoveredInputCount());
+    }
+
+    /**
+     * Verifies that implicit identity fallback for {@code get()} does not propagate
+     * into {@code getAll()}, which still requires an explicit command to cover an
+     * input.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("get should accept implicit identity while getAll still requires explicit coverage")
+    void getShouldAcceptImplicitIdentityWhileGetAllStillRequiresExplicitCoverage() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final String minimalDictionary = "run	running";
+
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(minimalDictionary),
+                "minimal", "MINIMAL", TEST_SEED);
+
+        final StemmerKnowledgeExperiment.ResultRow row = uniqueRow(rows,
+                resultKey(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, false, true, 100));
+
+        assertEquals(2L, row.evaluatedInputCount());
+        assertEquals(2L, row.getCorrectCount());
+        assertEquals(1.0d, row.getAccuracy());
+
+        assertEquals(1L, row.getAllCoveredInputCount());
+        assertEquals(0.5d, row.getAllRecall());
+        assertTrue(row.getAllPrecision() > 0.0d);
+        assertTrue(row.getAllPrecision() <= 1.0d);
+        assertTrue(row.getAllF1() < 1.0d);
+    }
+
+    /**
+     * Verifies CSV report generation.
+     *
+     * @throws IOException if report writing fails
+     */
+    @Test
+    @DisplayName("writeCsv should emit header and data rows")
+    void writeCsvShouldEmitHeaderAndDataRows() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        final Path output = this.tempDir.resolve("knowledge.csv");
+        StemmerKnowledgeExperiment.writeCsv(output, rows);
+
+        final List<String> writtenLines = Files.readAllLines(output, StandardCharsets.UTF_8);
+        assertFalse(writtenLines.isEmpty());
+        assertEquals(StemmerKnowledgeExperiment.ResultRow.csvHeader(), writtenLines.get(0));
+        assertEquals(rows.size() + 1, writtenLines.size());
+    }
+
+    /**
+     * Verifies that the result row key lookup remains stable for all generated
+     * rows.
+     *
+     * @throws IOException if evaluation fails
+     */
+    @Test
+    @DisplayName("all generated rows should be addressable by the synthetic key")
+    void allGeneratedRowsShouldBeAddressableBySyntheticKey() throws IOException {
+        final StemmerKnowledgeExperiment experiment = new StemmerKnowledgeExperiment();
+        final List<StemmerKnowledgeExperiment.ResultRow> rows = experiment.evaluate(new StringReader(DICTIONARY),
+                "synthetic", "SYNTHETIC", TEST_SEED);
+
+        for (StemmerKnowledgeExperiment.ResultRow row : rows) {
+            assertDoesNotThrow(() -> uniqueRow(rows, resultKey(row)));
+        }
+    }
+
+    /**
+     * Finds one unique row by a synthetic key.
+     *
+     * @param rows result rows
+     * @param key  synthetic key
+     * @return matching row
+     */
+    private static StemmerKnowledgeExperiment.ResultRow uniqueRow(final List<StemmerKnowledgeExperiment.ResultRow> rows,
+            final String key) {
+        final Map<String, StemmerKnowledgeExperiment.ResultRow> indexed = rows.stream()
+                .collect(Collectors.toMap(StemmerKnowledgeExperimentTest::resultKey, Function.identity()));
+        final StemmerKnowledgeExperiment.ResultRow row = indexed.get(key);
+        assertNotNull(row);
+        return row;
+    }
+
+    /**
+     * Creates a lookup key from a row.
+     *
+     * @param row result row
+     * @return lookup key
+     */
+    private static String resultKey(final StemmerKnowledgeExperiment.ResultRow row) {
+        return resultKey(ReductionMode.valueOf(row.reductionMode()), row.storeOriginal(), row.includeStemInEvaluation(),
+                row.knowledgePercent());
+    }
+
+    /**
+     * Creates a lookup key from scenario components.
+     *
+     * @param reductionMode           reduction mode
+     * @param storeOriginal           whether no-op patches were stored
+     * @param includeStemInEvaluation whether stems were evaluated
+     * @param knowledgePercent        knowledge percentage
+     * @return lookup key
+     */
+    private static String resultKey(final ReductionMode reductionMode, final boolean storeOriginal,
+            final boolean includeStemInEvaluation, final int knowledgePercent) {
+        return reductionMode.name() + '|' + storeOriginal + '|' + includeStemInEvaluation + '|' + knowledgePercent;
+    }
+}
--- a/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java
+++ b/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java
@@ -37,6 +37,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
@@ -46,12 +47,15 @@ import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;

 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Nested;
@@ -77,6 +81,7 @@ import org.junit.jupiter.params.provider.MethodSource;
 * <li>comment-aware parsing delegated to {@link StemmerDictionaryParser}</li>
 * <li>preservation of all valid stem candidates returned by
 * {@link FrequencyTrie#getAll(String)}</li>
+ * <li>the current bundled language set, including right-to-left metadata</li>
 * </ul>
 */
@Tag("unit")
@@ -97,126 +102,51 @@ final class StemmerPatchTrieLoaderTest {
     */
    private static final ReductionMode DEFAULT_REDUCTION_MODE = ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS;

+    /**
+     * Representative number of bundled words used for overload consistency checks.
+     */
+    private static final int REPRESENTATIVE_BUNDLED_WORD_COUNT = 25;
+
    /**
     * Provides arguments for bundled dictionary verification across both supported
     * getAll-preserving reduction modes.
     *
+     * <p>
+     * The stream is derived directly from the current {@link Language} enum so the
+     * test suite follows the supported bundled language set automatically.
+     * </p>
+     *
     * @return parameter stream
     */
    static Stream<Arguments> bundledDictionaryCases() {
-        return Stream.of(
-                // 01
-                Arguments.of("01-da_dk-ranked", StemmerPatchTrieLoader.Language.DA_DK,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
+        final ReductionMode[] reductionModes = new ReductionMode[] {
+                ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS,
+                ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS };

-                // 02
-                Arguments.of("02-de_de-ranked", StemmerPatchTrieLoader.Language.DE_DE,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 03
-                Arguments.of("03-es_es-ranked", StemmerPatchTrieLoader.Language.ES_ES,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 04
-                Arguments.of("04-fr_fr-ranked", StemmerPatchTrieLoader.Language.FR_FR,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 05
-                Arguments.of("05-it_it-ranked", StemmerPatchTrieLoader.Language.IT_IT,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 06
-                Arguments.of("06-nl_nl-ranked", StemmerPatchTrieLoader.Language.NL_NL,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 07
-                Arguments.of("07-no_no-ranked", StemmerPatchTrieLoader.Language.NO_NO,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 08
-                Arguments.of("08-pt_pt-ranked", StemmerPatchTrieLoader.Language.PT_PT,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 09
-                Arguments.of("09-ru_ru-ranked", StemmerPatchTrieLoader.Language.RU_RU,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 10
-                Arguments.of("10-sv_se-ranked", StemmerPatchTrieLoader.Language.SV_SE,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 11
-                Arguments.of("11-us_uk-ranked", StemmerPatchTrieLoader.Language.US_UK,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 12
-                Arguments.of("12-us_uk_profi-ranked", StemmerPatchTrieLoader.Language.US_UK_PROFI,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-
-                // 13
-                Arguments.of("13-da_dk-unordered", StemmerPatchTrieLoader.Language.DA_DK,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 14
-                Arguments.of("14-de_de-unordered", StemmerPatchTrieLoader.Language.DE_DE,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 15
-                Arguments.of("15-es_es-unordered", StemmerPatchTrieLoader.Language.ES_ES,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 16
-                Arguments.of("16-fr_fr-unordered", StemmerPatchTrieLoader.Language.FR_FR,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 17
-                Arguments.of("17-it_it-unordered", StemmerPatchTrieLoader.Language.IT_IT,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 18
-                Arguments.of("18-nl_nl-unordered", StemmerPatchTrieLoader.Language.NL_NL,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 19
-                Arguments.of("19-no_no-unordered", StemmerPatchTrieLoader.Language.NO_NO,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 20
-                Arguments.of("20-pt_pt-unordered", StemmerPatchTrieLoader.Language.PT_PT,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 21
-                Arguments.of("21-ru_ru-unordered", StemmerPatchTrieLoader.Language.RU_RU,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 22
-                Arguments.of("22-sv_se-unordered", StemmerPatchTrieLoader.Language.SV_SE,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 23
-                Arguments.of("23-us_uk-unordered", StemmerPatchTrieLoader.Language.US_UK,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS),
-
-                // 24
-                Arguments.of("24-us_uk_profi-unordered", StemmerPatchTrieLoader.Language.US_UK_PROFI,
-                        ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS));
+        return Arrays.stream(StemmerPatchTrieLoader.Language.values()).flatMap(language -> IntStream
+                .range(0, reductionModes.length)
+                .mapToObj(index -> Arguments.of(
+                        String.format("%02d-%s-%s", index + 1, language.name().toLowerCase(),
+                                reductionModes[index].name().toLowerCase()),
+                        language, reductionModes[index])));
    }

    /**
     * Provides representative bundled languages for overload consistency checks.
     *
+     * <p>
+     * The sample intentionally covers both traversal directions.
+     * </p>
+     *
     * @return parameter stream
     */
    static Stream<Arguments> bundledLanguageSamples() {
        return Stream.of(
-                // 01
                Arguments.of("01-us_uk", StemmerPatchTrieLoader.Language.US_UK),
-
-                // 02
                Arguments.of("02-de_de", StemmerPatchTrieLoader.Language.DE_DE),
-
-                // 03
-                Arguments.of("03-fr_fr", StemmerPatchTrieLoader.Language.FR_FR));
+                Arguments.of("03-fa_ir", StemmerPatchTrieLoader.Language.FA_IR),
+                Arguments.of("04-he_il", StemmerPatchTrieLoader.Language.HE_IL),
+                Arguments.of("05-yi", StemmerPatchTrieLoader.Language.YI));
    }

    /**
@@ -230,101 +160,64 @@ final class StemmerPatchTrieLoaderTest {
                .put("running", new PatchCommandEncoder().encode("running", "run")).build();

        return Stream.of(
-                // 01
                Arguments.of("01-load-language-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((StemmerPatchTrieLoader.Language) null,
                                true, settings),
                        "language"),
-
-                // 02
                Arguments.of("02-load-language-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((StemmerPatchTrieLoader.Language) null,
                                true, DEFAULT_REDUCTION_MODE),
                        "language"),
-
-                // 03
                Arguments.of("03-load-language-null-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(StemmerPatchTrieLoader.Language.US_UK,
                                true, (ReductionSettings) null),
                        "reductionSettings"),
-
-                // 04
                Arguments.of("04-load-language-null-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(StemmerPatchTrieLoader.Language.US_UK,
                                true, (ReductionMode) null),
                        "reductionMode"),
-
-                // 05
                Arguments.of("05-load-path-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((Path) null, true, settings), "path"),
-
-                // 06
                Arguments.of("06-load-path-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((Path) null, true,
                                DEFAULT_REDUCTION_MODE),
                        "path"),
-
-                // 07
                Arguments.of("07-load-path-null-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath(), true,
                                (ReductionSettings) null),
                        "reductionSettings"),
-
-                // 08
                Arguments.of("08-load-path-null-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath(), true, (ReductionMode) null),
                        "reductionMode"),
-
-                // 09
                Arguments.of("09-load-string-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((String) null, true, settings),
                        "fileName"),
-
-                // 10
                Arguments.of("10-load-string-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load((String) null, true,
                                DEFAULT_REDUCTION_MODE),
                        "fileName"),
-
-                // 11
                Arguments.of("11-load-string-null-settings",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath().toString(), true,
                                (ReductionSettings) null),
                        "reductionSettings"),
-
-                // 12
                Arguments.of("12-load-string-null-mode",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath().toString(), true,
                                (ReductionMode) null),
                        "reductionMode"),
-
-                // 13
                Arguments.of("13-load-binary-path",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((Path) null), "path"),
-
-                // 14
                Arguments.of("14-load-binary-string",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((String) null), "fileName"),
-
-                // 15
                Arguments.of("15-load-binary-stream",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((InputStream) null),
                        "inputStream"),
-
-                // 16
                Arguments.of("16-save-binary-null-trie-path",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(null, tempPath()), "trie"),
-
-                // 17
                Arguments.of("17-save-binary-null-path",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(trie, (Path) null), "path"),
-
-                // 18
                Arguments.of("18-save-binary-null-trie-string",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(null, tempPath().toString()),
                        "trie"),
-
-                // 19
                Arguments.of("19-save-binary-null-string",
                        (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(trie, (String) null),
                        "fileName"));
@@ -409,9 +302,9 @@ final class StemmerPatchTrieLoaderTest {
        @DisplayName("Path and String overloads must load equivalent tries")
        void shouldLoadEquivalentTrieFromPathAndStringOverloads() throws IOException {
            final Path dictionaryFile = writeDictionary("""
-                    run running runs runner
-                    play playing played plays
-                    city cities
+                    run	running	runs	runner
+                    play	playing	played	plays
+                    city	cities
                    """);

            final ReductionSettings settings = ReductionSettings.withDefaults(DEFAULT_REDUCTION_MODE);
@@ -425,9 +318,10 @@ final class StemmerPatchTrieLoaderTest {
            final FrequencyTrie<String> fromStringWithMode = StemmerPatchTrieLoader.load(dictionaryFile.toString(),
                    true, DEFAULT_REDUCTION_MODE);

-            assertTriePatchSemanticsEqual(fromPathWithSettings, fromPathWithMode, "running", "played", "cities", "run");
-            assertTriePatchSemanticsEqual(fromPathWithSettings, fromStringWithSettings, "running", "played", "cities",
+            assertTriePatchSemanticsEqual(fromPathWithSettings, fromPathWithMode, "running", "played", "cities",
                    "run");
+            assertTriePatchSemanticsEqual(fromPathWithSettings, fromStringWithSettings, "running", "played",
+                    "cities", "run");
            assertTriePatchSemanticsEqual(fromPathWithSettings, fromStringWithMode, "running", "played", "cities",
                    "run");
        }
@@ -442,7 +336,7 @@ final class StemmerPatchTrieLoaderTest {
        @DisplayName("storeOriginal=true must make the stem itself resolvable through the no-op patch")
        void shouldStoreOriginalStemWhenRequested() throws IOException {
            final Path dictionaryFile = writeDictionary("""
-                    run running runs
+                    run	running	runs
                    """);

            final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(dictionaryFile, true,
@@ -467,8 +361,8 @@ final class StemmerPatchTrieLoaderTest {
        @DisplayName("storeOriginal=false must not insert the stem itself unless present as a variant elsewhere")
        void shouldNotStoreOriginalStemWhenDisabled() throws IOException {
            final Path dictionaryFile = writeDictionary("""
-                    run running runs
-                    play playing played plays
+                    run	running	runs
+                    play	playing	played	plays
                    """);

            final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(dictionaryFile, false,
@@ -480,6 +374,29 @@ final class StemmerPatchTrieLoaderTest {
                    "Variants must still reconstruct the proper stem.");
        }

+        /**
+         * Verifies that the loader honors forward traversal for right-to-left
+         * dictionaries loaded from filesystem overloads.
+         *
+         * @throws IOException if the test file cannot be written or read
+         */
+        @Test
+        @DisplayName("Explicit right-to-left loading must use forward traversal semantics")
+        void shouldUseForwardTraversalForExplicitRightToLeftLoading() throws IOException {
+            final Path dictionaryFile = writeDictionary("""
+                    كتب	كتابة	كتاب
+                    """);
+
+            final ReductionSettings settings = ReductionSettings.withDefaults(DEFAULT_REDUCTION_MODE);
+            final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(dictionaryFile, true, settings,
+                    WordTraversalDirection.FORWARD);
+
+            assertEquals(WordTraversalDirection.FORWARD, trie.traversalDirection(),
+                    "Right-to-left loading must produce a forward-traversed trie.");
+            assertEquals(Set.of("كتب"), reconstructAllStemCandidates(trie, "كتابة"),
+                    "Patch reconstruction must use the trie traversal direction.");
+        }
+
        /**
         * Verifies that comment syntax documented by the loader is effectively honored
         * through delegated parsing.
@@ -492,10 +409,10 @@ final class StemmerPatchTrieLoaderTest {
            final Path dictionaryFile = writeDictionary("""
                    # full-line hash comment
                    // full-line slash comment
-                    run running runs // inline slash comment
-                    play playing played # inline hash comment
+                    run	running	runs // inline slash comment
+                    play	playing	played # inline hash comment

-                    city cities
+                    city	cities
                    """);

            final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(dictionaryFile, true,
@@ -518,9 +435,9 @@ final class StemmerPatchTrieLoaderTest {
        @DisplayName("Binary save and load overloads must preserve trie semantics")
        void shouldRoundTripBinaryTrieAcrossAllBinaryOverloads() throws IOException {
            final Path dictionaryFile = writeDictionary("""
-                    run running runs runner
-                    city cities
-                    study studies studying
+                    run	running	runs	runner
+                    city	cities
+                    study	studies	studying
                    """);
            final Path binaryFile = tempDir.resolve("stemmer-trie.bin.gz");

@@ -535,9 +452,12 @@ final class StemmerPatchTrieLoaderTest {
            try (InputStream inputStream = new ByteArrayInputStream(binaryBytes)) {
                final FrequencyTrie<String> fromStream = StemmerPatchTrieLoader.loadBinary(inputStream);

-                assertTriePatchSemanticsEqual(original, fromPath, "run", "running", "runner", "cities", "studying");
-                assertTriePatchSemanticsEqual(original, fromString, "run", "running", "runner", "cities", "studying");
-                assertTriePatchSemanticsEqual(original, fromStream, "run", "running", "runner", "cities", "studying");
+                assertTriePatchSemanticsEqual(original, fromPath, "run", "running", "runner", "cities",
+                        "studying");
+                assertTriePatchSemanticsEqual(original, fromString, "run", "running", "runner", "cities",
+                        "studying");
+                assertTriePatchSemanticsEqual(original, fromStream, "run", "running", "runner", "cities",
+                        "studying");
            }
        }

@@ -562,6 +482,54 @@ final class StemmerPatchTrieLoaderTest {
    @DisplayName("Bundled dictionaries")
    final class BundledDictionaryTests {

+        /**
+         * Verifies that the current language enumeration exactly matches the bundled
+         * language set expected by this project revision.
+         */
+        @Test
+        @DisplayName("Language enum must expose the current bundled language set")
+        void shouldExposeCurrentBundledLanguageSet() {
+            final Set<StemmerPatchTrieLoader.Language> expectedLanguages = new LinkedHashSet<StemmerPatchTrieLoader.Language>(
+                    Arrays.asList(StemmerPatchTrieLoader.Language.CS_CZ, StemmerPatchTrieLoader.Language.DA_DK,
+                            StemmerPatchTrieLoader.Language.DE_DE, StemmerPatchTrieLoader.Language.ES_ES,
+                            StemmerPatchTrieLoader.Language.FA_IR, StemmerPatchTrieLoader.Language.FI_FI,
+                            StemmerPatchTrieLoader.Language.FR_FR, StemmerPatchTrieLoader.Language.HE_IL,
+                            StemmerPatchTrieLoader.Language.HU_HU, StemmerPatchTrieLoader.Language.IT_IT,
+                            StemmerPatchTrieLoader.Language.NB_NO, StemmerPatchTrieLoader.Language.NL_NL,
+                            StemmerPatchTrieLoader.Language.NN_NO, StemmerPatchTrieLoader.Language.PL_PL,
+                            StemmerPatchTrieLoader.Language.PT_PT, StemmerPatchTrieLoader.Language.RU_RU,
+                            StemmerPatchTrieLoader.Language.SV_SE, StemmerPatchTrieLoader.Language.UK_UA,
+                            StemmerPatchTrieLoader.Language.US_UK, StemmerPatchTrieLoader.Language.YI));
+
+            final Set<StemmerPatchTrieLoader.Language> actualLanguages = new LinkedHashSet<StemmerPatchTrieLoader.Language>(
+                    Arrays.asList(StemmerPatchTrieLoader.Language.values()));
+
+            assertEquals(expectedLanguages, actualLanguages,
+                    "The bundled language enum must match the project's supported language set exactly.");
+        }
+
+        /**
+         * Verifies that the right-to-left metadata is correctly assigned for the
+         * currently supported bundled languages.
+         */
+        @Test
+        @DisplayName("Language enum must mark right-to-left bundled languages correctly")
+        void shouldExposeCorrectRightToLeftMetadata() {
+            final Set<StemmerPatchTrieLoader.Language> expectedRightToLeftLanguages = Set.of(
+                    StemmerPatchTrieLoader.Language.FA_IR, StemmerPatchTrieLoader.Language.HE_IL,
+                    StemmerPatchTrieLoader.Language.YI);
+
+            for (StemmerPatchTrieLoader.Language language : StemmerPatchTrieLoader.Language.values()) {
+                if (expectedRightToLeftLanguages.contains(language)) {
+                    assertTrue(language.isRightToLeft(),
+                            () -> language.name() + " must be marked as right-to-left.");
+                } else {
+                    assertFalse(language.isRightToLeft(),
+                            () -> language.name() + " must not be marked as right-to-left.");
+                }
+            }
+        }
+
        /**
         * Verifies that each bundled dictionary compiles into a trie whose
         * {@link FrequencyTrie#getAll(String)} results still reconstruct exactly the
@@ -586,6 +554,8 @@ final class StemmerPatchTrieLoaderTest {

            assertNotNull(trie, "Compiled trie must be created.");
            assertFalse(expectedStemsByWord.isEmpty(), "Bundled dictionary must not be empty.");
+            assertEquals(language.isRightToLeft() ? WordTraversalDirection.FORWARD : WordTraversalDirection.BACKWARD,
+                    trie.traversalDirection(), "Trie traversal direction must match language metadata.");

            for (Map.Entry<String, Set<String>> entry : expectedStemsByWord.entrySet()) {
                final String word = entry.getKey();
@@ -595,8 +565,9 @@ final class StemmerPatchTrieLoaderTest {
                assertFalse(actualStems.isEmpty(),
                        () -> "No patch candidates returned for word '" + word + "' in scenario " + scenario + ".");

-                assertEquals(expectedStems, actualStems, () -> "Reconstructed stem candidates differ for word '" + word
-                        + "' in scenario " + scenario + "'. Expected: " + expectedStems + ", actual: " + actualStems);
+                assertEquals(expectedStems, actualStems,
+                        () -> "Reconstructed stem candidates differ for word '" + word + "' in scenario " + scenario
+                                + "'. Expected: " + expectedStems + ", actual: " + actualStems);
            }
        }

@@ -619,13 +590,12 @@ final class StemmerPatchTrieLoaderTest {
            final FrequencyTrie<String> viaMode = StemmerPatchTrieLoader.load(language, true, DEFAULT_REDUCTION_MODE);

            final Map<String, Set<String>> expectedStemsByWord = readExpectedStems(language);
-            final int verifiedWords = 25;
            int counter = 0;

            for (Map.Entry<String, Set<String>> entry : expectedStemsByWord.entrySet()) {
                assertTriePatchSemanticsEqual(viaSettings, viaMode, entry.getKey());
                counter++;
-                if (counter >= verifiedWords) {
+                if (counter >= REPRESENTATIVE_BUNDLED_WORD_COUNT) {
                    break;
                }
            }
@@ -704,7 +674,7 @@ final class StemmerPatchTrieLoaderTest {
        }

        for (String patchCommand : patchCommands) {
-            stems.add(PatchCommandEncoder.apply(word, patchCommand));
+            stems.add(PatchCommandEncoder.apply(word, patchCommand, trie.traversalDirection()));
        }

        return stems;
@@ -743,7 +713,7 @@ final class StemmerPatchTrieLoaderTest {
        if (inputStream == null) {
            throw new IOException("Bundled stemmer resource not found: " + resourcePath);
        }
-        return inputStream;
+        return new GZIPInputStream(inputStream);
    }

    /**
--- a/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java
+++ b/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java
@@ -82,10 +82,10 @@ class StemmerPatchTrieProperties extends PropertyBasedTestSupport {
            assertTrue(preferredPatch != null && !preferredPatch.isEmpty(),
                    "preferred patch must exist for an observed word.");
            assertTrue(allPatches.length >= 1, "at least one patch must exist for an observed word.");
-            assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(observedWord, preferredPatch)),
+            assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(observedWord, preferredPatch, trie.traversalDirection())),
                    "preferred patch reconstructed an unexpected stem.");

-            final Set<String> producedStems = applyAll(observedWord, allPatches);
+            final Set<String> producedStems = applyAll(trie, observedWord, allPatches);
            assertTrue(acceptableStems.containsAll(producedStems),
                    "getAll() must not expose a patch that reconstructs an undeclared stem.");

@@ -125,10 +125,10 @@ class StemmerPatchTrieProperties extends PropertyBasedTestSupport {
     * @param patches returned patches
     * @return decoded stem set
     */
-    private static Set<String> applyAll(final String source, final String[] patches) {
+    private static Set<String> applyAll(final FrequencyTrie<String> trie, final String source, final String[] patches) {
        final LinkedHashSet<String> stems = new LinkedHashSet<>();
        for (String patch : patches) {
-            stems.add(PatchCommandEncoder.apply(source, patch));
+            stems.add(PatchCommandEncoder.apply(source, patch, trie.traversalDirection()));
        }
        return stems;
    }
--- a/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java
+++ b/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java
@@ -1,25 +1,21 @@
 /*******************************************************************************
 * Copyright (C) 2026, Leo Galambos
 * All rights reserved.
- *
+ * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- *
+ * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
- *
+ * 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- *
- * 3. All advertising materials mentioning features or use of this software must
- *    display the following acknowledgement:
- *    This product includes software developed by the Egothor project.
- *
- * 4. Neither the name of the copyright holder nor the names of its contributors
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
- *
+ * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
--- a/src/test/resources/org/egothor/stemmer/compile/remark-aware-dictionary.txt
+++ b/src/test/resources/org/egothor/stemmer/compile/remark-aware-dictionary.txt
@@ -1,8 +1,8 @@
 # full-line remark
 // full-line slash remark

-run running runs runner // trailing remark
-walk walking walks walked
-city cities
-café cafés
-play playing played # trailing remark
+run	running	runs	runner	// trailing remark
+walk	walking	walks	walked
+city	cities
+café	cafés
+play	playing	played # trailing remark
--- a/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz
+++ b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz
--- a/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz.sha256
+++ b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz.sha256
@@ -1 +1 @@
-62f6419ebab324a69e2e4ef9753687326aa20eed4e851a0f2b63a10f50d2eaae  branching-en-ranked-no-storeorig.gz
+fc5ede5cdee6930eb3d4b0cb35387f358a6fed6ddf935e9e627ac825cf7bf55b  branching-en-ranked-no-storeorig.gz
--- a/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz
+++ b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz
--- a/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz.sha256
+++ b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz.sha256
@@ -1 +1 @@
-7b65be9ed9ffab418ed2d1fccc219ea6925e192aa27cdefe5c8383570becd28f  mini-en-ranked-storeorig.gz
+e284287c49750180980091378f68c08df38b515f5628596ce8fcfdff10512276  mini-en-ranked-storeorig.gz
--- a/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz
+++ b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz
--- a/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz.sha256
+++ b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz.sha256
@@ -1 +1 @@
-7b65be9ed9ffab418ed2d1fccc219ea6925e192aa27cdefe5c8383570becd28f  mini-en-unordered-storeorig.gz
+e3383ddd58c2a0c43225795edf03bbd422f55da7510a9f68ec11f81905b01d96  mini-en-unordered-storeorig.gz
--- a/src/test/resources/regression/sources/branching-en.stemmer
+++ b/src/test/resources/regression/sources/branching-en.stemmer
@@ -1,5 +1,5 @@
 # Focused on subtree branching and repeated suffix families
-connect connected connecting connects connection
-collect collected collecting collects collection
-inspect inspected inspecting inspects inspection
-direct directed directing directs direction
+connect	connected	connecting	connects	connection
+collect	collected	collecting	collects	collection
+inspect	inspected	inspecting	inspects	inspection
+direct	directed	directing	directs		direction
--- a/src/test/resources/regression/sources/mini-en.stemmer
+++ b/src/test/resources/regression/sources/mini-en.stemmer
@@ -1,6 +1,6 @@
 # Basic English sample with remarks and mixed suffix patterns
-run running runs runner
-study studies studying
-city cities
-fly flies flying
-stop stopped stopping stops
+run	running	runs	runner
+study	studies	studying
+city	cities
+fly	flies	flying
+stop	stopped	stopping	stops