From 8785f2b7cb03754407c0f9789cf55b044ad6dda4 Mon Sep 17 00:00:00 2001
From: Leo Galambos <lg@hq.egothor.org>
Date: Thu, 23 Apr 2026 22:32:05 +0200
Subject: [PATCH] feat: Apply metadata-driven case normalization in get/getAll

---
 docs/cli-compilation.md                       |  2 +-
 docs/contributing-dictionaries.md             |  2 +-
 docs/dictionary-format.md                     |  4 +-
 docs/programmatic-loading-and-building.md     |  2 +-
 docs/quick-start.md                           |  4 +-
 .../egothor/stemmer/CaseProcessingMode.java   | 55 +++++++++++++
 .../java/org/egothor/stemmer/Compile.java     | 30 +++++--
 .../org/egothor/stemmer/FrequencyTrie.java    | 78 ++++++++++++++++---
 .../stemmer/StemmerDictionaryParser.java      | 73 +++++++++++++++--
 .../stemmer/StemmerPatchTrieLoader.java       | 63 +++++++++++++--
 .../org/egothor/stemmer/TrieMetadata.java     | 15 +++-
 .../org/egothor/stemmer/package-info.java     |  4 +-
 .../egothor/stemmer/FrequencyTrieTest.java    | 37 +++++++++
 .../stemmer/StemmerDictionaryParserTest.java  | 27 ++++++-
 14 files changed, 353 insertions(+), 43 deletions(-)
 create mode 100644 src/main/java/org/egothor/stemmer/CaseProcessingMode.java

diff --git a/docs/cli-compilation.md b/docs/cli-compilation.md
index 851a8b7..d41efeb 100644
--- a/docs/cli-compilation.md
+++ b/docs/cli-compilation.md
@@ -47,7 +47,7 @@ The CLI supports the following arguments:
 
 Path to the source dictionary file.
 
-The file must use the standard line-oriented tab-separated values dictionary format, meaning that columns are separated by the tab character. Each non-empty logical line starts with the canonical stem column and may contain zero or more variant columns. The parser expects UTF-8 input, lowercases it using `Locale.ROOT`, ignores trailing remarks introduced by `#` or `//`, and currently ignores dictionary items containing embedded whitespace while reporting them through warning-level log entries.
+The file must use the standard line-oriented tab-separated values dictionary format, meaning that columns are separated by the tab character. Each non-empty logical line starts with the canonical stem column and may contain zero or more variant columns. The parser expects UTF-8 input, processes case according to `CaseProcessingMode` (default: `LOWERCASE_WITH_LOCALE_ROOT`), ignores trailing remarks introduced by `#` or `//`, and currently ignores dictionary items containing embedded whitespace while reporting them through warning-level log entries.
 
 Example:
 
diff --git a/docs/contributing-dictionaries.md b/docs/contributing-dictionaries.md
index 7963b82..ed20b6f 100644
--- a/docs/contributing-dictionaries.md
+++ b/docs/contributing-dictionaries.md
@@ -41,7 +41,7 @@ The parser:
 
 - reads UTF-8 text,
 - interprets each line as tab-separated values,
-- normalizes input to lower case using `Locale.ROOT`,
+- applies configurable case processing through `CaseProcessingMode` (default: `LOWERCASE_WITH_LOCALE_ROOT`),
 - ignores empty lines,
 - supports remarks introduced by `#` or `//`,
 - currently ignores dictionary items containing embedded whitespace and reports them through warning-level log entries.
diff --git a/docs/dictionary-format.md b/docs/dictionary-format.md
index 38543af..7082c13 100644
--- a/docs/dictionary-format.md
+++ b/docs/dictionary-format.md
@@ -111,7 +111,7 @@ This is also valid:
 
 ## Case normalization
 
-Input lines are normalized to lower case using `Locale.ROOT` before tab-separated columns are processed into dictionary entries.
+Input-line case normalization is controlled by `CaseProcessingMode`; by default the parser uses `LOWERCASE_WITH_LOCALE_ROOT` before tab-separated columns are processed into dictionary entries.
 
 That means dictionary authors should treat the format as **case-insensitive at load time**. If a file contains uppercase or mixed-case tokens, they will be normalized during parsing.
 
@@ -193,7 +193,7 @@ Run	Running	Runs	Ran
 CONNECT	Connected	Connecting
 ```
 
-This is accepted, but it is normalized to lower case during parsing.
+This is accepted. Under the default `LOWERCASE_WITH_LOCALE_ROOT` mode it is normalized to lower case during parsing; under `AS_IS` it is preserved.
 
 ## Format limitations
 
diff --git a/docs/programmatic-loading-and-building.md b/docs/programmatic-loading-and-building.md
index b113aad..210da89 100644
--- a/docs/programmatic-loading-and-building.md
+++ b/docs/programmatic-loading-and-building.md
@@ -32,7 +32,7 @@ The `storeOriginal` flag controls whether the canonical stem is inserted as a no
 
 ## Load a textual dictionary
 
-Loading from a dictionary file follows the same preparation model as bundled resources, but the source comes from your own file or path. The textual format is tab-separated values, meaning that columns are separated by the tab character. Each non-empty logical line starts with the stem column and may contain zero or more variant columns. Input is normalized to lower case using `Locale.ROOT`, trailing remarks introduced by `#` or `//` are ignored, and dictionary items containing embedded whitespace are currently ignored with warning-level diagnostics.
+Loading from a dictionary file follows the same preparation model as bundled resources, but the source comes from your own file or path. The textual format is tab-separated values, meaning that columns are separated by the tab character. Each non-empty logical line starts with the stem column and may contain zero or more variant columns. Input case normalization is controlled by `CaseProcessingMode` (default: `LOWERCASE_WITH_LOCALE_ROOT`), trailing remarks introduced by `#` or `//` are ignored, and dictionary items containing embedded whitespace are currently ignored with warning-level diagnostics.
 
 ```java
 import java.io.IOException;
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 0c209be..85122c2 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -69,7 +69,7 @@ public final class LoadBinaryStemmerExample {
 
 ### Build or extend a stemmer from dictionary data
 
-Radixor can also build a compiled trie from a custom dictionary. Dictionary lines consist of a canonical stem followed by zero or more variants. The parser lowercases input with `Locale.ROOT`, ignores leading and trailing whitespace, and supports line remarks introduced by `#` or `//`.
+Radixor can also build a compiled trie from a custom dictionary. Dictionary lines consist of a canonical stem followed by zero or more variants. The parser applies `CaseProcessingMode` (default: `LOWERCASE_WITH_LOCALE_ROOT`), ignores leading and trailing whitespace, and supports line remarks introduced by `#` or `//`.
 
 This path is also relevant when you extend an existing compiled stemmer with additional domain-specific entries and rebuild a new compact artifact.
 
@@ -206,4 +206,4 @@ Dictionary compilation is usually a one-time preparation step and is generally f
 
 ## Persisted trie metadata
 
-Every compiled trie artifact stores a `TrieMetadata` descriptor together with the immutable trie payload. That metadata currently records the binary format version, the `WordTraversalDirection`, the `ReductionSettings` used during compilation, and the declared `DiacriticProcessingMode`. Even when a given release does not yet actively branch on every field at query time, persisting the full descriptor keeps artifacts self-describing and prepares the format for future matching strategies without relying on side-channel configuration.
+Every compiled trie artifact stores a `TrieMetadata` descriptor together with the immutable trie payload. That metadata currently records the binary format version, the `WordTraversalDirection`, the `ReductionSettings` used during compilation, the declared `DiacriticProcessingMode`, and the selected `CaseProcessingMode`. The traversal and case-processing settings are applied during runtime lookup (`get`, `getAll`), while persisting the full descriptor keeps artifacts self-describing and prepares the format for future matching strategies without relying on side-channel configuration.
diff --git a/src/main/java/org/egothor/stemmer/CaseProcessingMode.java b/src/main/java/org/egothor/stemmer/CaseProcessingMode.java
new file mode 100644
index 0000000..280e962
--- /dev/null
+++ b/src/main/java/org/egothor/stemmer/CaseProcessingMode.java
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * Copyright (C) 2026, Leo Galambos
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ ******************************************************************************/
+package org.egothor.stemmer;
+
+import java.util.Locale;
+
+/**
+ * Defines how dictionary items are normalized with respect to letter casing.
+ *
+ * <p>
+ * The mode is applied while parsing dictionary sources and can be persisted in
+ * trie metadata so that compiled artifacts remain self-describing.
+ */
+public enum CaseProcessingMode {
+
+    /**
+     * Preserves input character casing exactly as provided by the dictionary
+     * source.
+     */
+    AS_IS,
+
+    /**
+     * Normalizes all dictionary content to lower case using
+     * {@link Locale#ROOT}.
+     */
+    LOWERCASE_WITH_LOCALE_ROOT
+}
diff --git a/src/main/java/org/egothor/stemmer/Compile.java b/src/main/java/org/egothor/stemmer/Compile.java
index 2f61044..d3c44e8 100644
--- a/src/main/java/org/egothor/stemmer/Compile.java
+++ b/src/main/java/org/egothor/stemmer/Compile.java
@@ -61,6 +61,7 @@ import java.util.logging.Logger;
  * --output &lt;file&gt;
  * --reduction-mode &lt;mode&gt;
  * [--store-original]
+ * [--case-processing-mode <mode>]
  * [--dominant-winner-min-percent &lt;1..100&gt;]
  * [--dominant-winner-over-second-ratio &lt;1..n&gt;]
  * [--overwrite]
@@ -152,7 +153,7 @@ public final class Compile {
         final WordTraversalDirection traversalDirection = arguments.rightToLeft() ? WordTraversalDirection.FORWARD
                 : WordTraversalDirection.BACKWARD;
         final FrequencyTrie<String> trie = StemmerPatchTrieLoader.load(arguments.inputFile(), arguments.storeOriginal(),
-                reductionSettings, traversalDirection);
+                reductionSettings, traversalDirection, arguments.caseProcessingMode());
 
         final Path outputFile = arguments.outputFile();
         final Path parent = outputFile.toAbsolutePath().getParent();
@@ -168,10 +169,10 @@ public final class Compile {
 
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.log(Level.INFO,
-                    "Compiled dictionary {0} to {1} using mode {2}, storeOriginal={3}, rightToLeft={4}, dominantWinnerMinPercent={5}, dominantWinnerOverSecondRatio={6}.",
+                    "Compiled dictionary {0} to {1} using mode {2}, storeOriginal={3}, rightToLeft={4}, caseProcessingMode={5}, dominantWinnerMinPercent={6}, dominantWinnerOverSecondRatio={7}.",
                     new Object[] { arguments.inputFile().toAbsolutePath().toString(),
                             arguments.outputFile().toAbsolutePath().toString(), arguments.reductionMode().name(),
-                            arguments.storeOriginal(), arguments.rightToLeft(),
+                            arguments.storeOriginal(), arguments.rightToLeft(), arguments.caseProcessingMode(),
                             arguments.dominantWinnerMinPercent(), arguments.dominantWinnerOverSecondRatio() });
         }
     }
@@ -186,6 +187,7 @@ public final class Compile {
         System.err.println("      --output <file> \\");
         System.err.println("      --reduction-mode <mode> \\");
         System.err.println("      [--store-original] \\");
+        System.err.println("      [--case-processing-mode <mode>] \\");
         System.err.println("      [--dominant-winner-min-percent <1..100>] \\");
         System.err.println("      [--dominant-winner-over-second-ratio <1..n>] \\");
         System.err.println("      [--overwrite]");
@@ -199,6 +201,13 @@ public final class Compile {
         System.err.println("      of the stored word form and patch commands are encoded likewise.");
         System.err.println("  --overwrite");
         System.err.println("      Replaces the target file when it already exists.");
+        System.err.println("  --case-processing-mode");
+        System.err.println("      Controls whether dictionary input is lowercased or preserved as-is.");
+        System.err.println();
+        System.err.println("Supported case processing modes:");
+        for (CaseProcessingMode mode : CaseProcessingMode.values()) {
+            System.err.println("  " + mode.name());
+        }
         System.err.println();
         System.err.println("Supported reduction modes:");
         for (ReductionMode mode : ReductionMode.values()) {
@@ -256,14 +265,15 @@ public final class Compile {
      *                                      forward traversal on stored word forms
      * @param dominantWinnerMinPercent      dominant winner minimum percent
      * @param dominantWinnerOverSecondRatio dominant winner over second ratio
+     * @param caseProcessingMode            dictionary case processing mode
      * @param overwrite                     whether an existing output may be
      *                                      replaced
      * @param help                          whether usage help was requested
      */
     @SuppressWarnings("PMD.LongVariable")
     private record Arguments(Path inputFile, Path outputFile, ReductionMode reductionMode, boolean storeOriginal,
-            boolean rightToLeft, int dominantWinnerMinPercent, int dominantWinnerOverSecondRatio, boolean overwrite,
-            boolean help) {
+            boolean rightToLeft, int dominantWinnerMinPercent, int dominantWinnerOverSecondRatio,
+            CaseProcessingMode caseProcessingMode, boolean overwrite, boolean help) {
 
         /**
          * Parses raw command-line arguments.
@@ -282,6 +292,7 @@ public final class Compile {
             boolean rightToLeft = false;
             boolean overwrite = false;
             boolean help = false;
+            CaseProcessingMode caseProcessingMode = CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT;
             int dominantWinnerMinPercent = ReductionSettings.DEFAULT_DOMINANT_WINNER_MIN_PERCENT;
             int dominantWinnerOverSecondRatio = ReductionSettings.DEFAULT_DOMINANT_WINNER_OVER_SECOND_RATIO;
 
@@ -330,6 +341,11 @@ public final class Compile {
                                 requireValue(arguments, ++index, "--dominant-winner-over-second-ratio"),
                                 "--dominant-winner-over-second-ratio");
                         break;
+                    case "--case-processing-mode":
+                        caseProcessingMode = CaseProcessingMode
+                                .valueOf(requireValue(arguments, ++index, "--case-processing-mode")
+                                        .toUpperCase(Locale.ROOT));
+                        break;
 
                     default:
                         throw new IllegalArgumentException("Unknown argument: " + argument);
@@ -338,7 +354,7 @@ public final class Compile {
 
             if (help) {
                 return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, rightToLeft,
-                        dominantWinnerMinPercent, dominantWinnerOverSecondRatio, overwrite, true);
+                        dominantWinnerMinPercent, dominantWinnerOverSecondRatio, caseProcessingMode, overwrite, true);
             }
 
             if (inputFile == null) {
@@ -352,7 +368,7 @@ public final class Compile {
             }
 
             return new Arguments(inputFile, outputFile, reductionMode, storeOriginal, rightToLeft,
-                    dominantWinnerMinPercent, dominantWinnerOverSecondRatio, overwrite, false);
+                    dominantWinnerMinPercent, dominantWinnerOverSecondRatio, caseProcessingMode, overwrite, false);
         }
 
         /**
diff --git a/src/main/java/org/egothor/stemmer/FrequencyTrie.java b/src/main/java/org/egothor/stemmer/FrequencyTrie.java
index 54ebdb8..b36ac88 100644
--- a/src/main/java/org/egothor/stemmer/FrequencyTrie.java
+++ b/src/main/java/org/egothor/stemmer/FrequencyTrie.java
@@ -41,6 +41,7 @@ import java.util.Collections;
 import java.util.IdentityHashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.function.IntFunction;
@@ -101,7 +102,7 @@ public final class FrequencyTrie<V> {
     /**
      * Binary format version.
      */
-    private static final int STREAM_VERSION = 3;
+    private static final int STREAM_VERSION = 4;
 
     /**
      * Factory used to create correctly typed arrays for {@link #getAll(String)}.
@@ -142,6 +143,10 @@ public final class FrequencyTrie<V> {
      * selected deterministically by shorter {@code toString()} value first, then by
      * lexicographically lower {@code toString()}, and finally by stable first-seen
      * order.
+     *
+     * <p>
+     * The supplied key is normalized according to persisted
+     * {@link TrieMetadata#caseProcessingMode()} before traversal.
      * 
      * @param key key to resolve
      * @return most frequent value, or {@code null} if the key does not exist or no
@@ -150,7 +155,7 @@ public final class FrequencyTrie<V> {
      */
     public V get(final String key) {
         Objects.requireNonNull(key, "key");
-        final CompiledNode<V> node = findNode(key);
+        final CompiledNode<V> node = findNode(normalizeLookupKey(key));
         if (node == null || node.orderedValues().length == 0) {
             return null;
         }
@@ -170,6 +175,10 @@ public final class FrequencyTrie<V> {
      * <p>
      * The returned array is a defensive copy.
      *
+     * <p>
+     * The supplied key is normalized according to persisted
+     * {@link TrieMetadata#caseProcessingMode()} before traversal.
+     *
      * @param key key to resolve
      * @return all values stored at the addressed node, ordered by descending
      *         frequency; returns an empty array if the key does not exist or no
@@ -178,7 +187,7 @@ public final class FrequencyTrie<V> {
      */
     public V[] getAll(final String key) {
         Objects.requireNonNull(key, "key");
-        final CompiledNode<V> node = findNode(key);
+        final CompiledNode<V> node = findNode(normalizeLookupKey(key));
         if (node == null || node.orderedValues().length == 0) {
             return this.arrayFactory.apply(0);
         }
@@ -336,7 +345,7 @@ public final class FrequencyTrie<V> {
         }
 
         final int version = dataInput.readInt();
-        if (version != 1 && version != STREAM_VERSION) {
+        if (version != 1 && version != 3 && version != STREAM_VERSION) {
             throw new IOException("Unsupported trie stream version: " + version);
         }
 
@@ -376,6 +385,7 @@ public final class FrequencyTrie<V> {
         dataOutput.writeInt(metadata.reductionSettings().dominantWinnerMinPercent());
         dataOutput.writeInt(metadata.reductionSettings().dominantWinnerOverSecondRatio());
         dataOutput.writeInt(metadata.diacriticProcessingMode().ordinal());
+        dataOutput.writeInt(metadata.caseProcessingMode().ordinal());
     }
 
     /**
@@ -419,10 +429,22 @@ public final class FrequencyTrie<V> {
             throw new IOException("Invalid diacritic processing mode ordinal: " + diacriticProcessingModeOrdinal);
         }
 
-        return new TrieMetadata(
-                version, traversalDirection, new ReductionSettings(reductionModes[reductionModeOrdinal],
-                        dominantWinnerMinPercent, dominantWinnerOverSecondRatio),
-                diacriticProcessingModes[diacriticProcessingModeOrdinal]);
+        final CaseProcessingMode caseProcessingMode;
+        if (version >= 4) { // NOPMD
+            final CaseProcessingMode[] caseProcessingModes = CaseProcessingMode.values();
+            final int caseProcessingModeOrdinal = dataInput.readInt();
+            if (caseProcessingModeOrdinal < 0 || caseProcessingModeOrdinal >= caseProcessingModes.length) {
+                throw new IOException("Invalid case processing mode ordinal: " + caseProcessingModeOrdinal);
+            }
+            caseProcessingMode = caseProcessingModes[caseProcessingModeOrdinal];
+        } else {
+            caseProcessingMode = CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT;
+        }
+
+        return new TrieMetadata(version, traversalDirection,
+                new ReductionSettings(reductionModes[reductionModeOrdinal], dominantWinnerMinPercent,
+                        dominantWinnerOverSecondRatio),
+                diacriticProcessingModes[diacriticProcessingModeOrdinal], caseProcessingMode);
     }
 
     /**
@@ -598,7 +620,7 @@ public final class FrequencyTrie<V> {
     /**
      * Locates the compiled node for the supplied key.
      *
-     * @param key key to resolve
+     * @param key already-normalized key to resolve
      * @return compiled node, or {@code null} if the path does not exist
      */
     private CompiledNode<V> findNode(final String key) {
@@ -613,6 +635,19 @@ public final class FrequencyTrie<V> {
         return current;
     }
 
+    /**
+     * Applies lookup-time case normalization according to persisted metadata.
+     *
+     * @param key lookup key
+     * @return normalized key for trie traversal
+     */
+    private String normalizeLookupKey(final String key) {
+        if (this.metadata.caseProcessingMode() == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT) {
+            return key.toLowerCase(Locale.ROOT);
+        }
+        return key;
+    }
+
     /**
      * Builder of {@link FrequencyTrie}.
      *
@@ -647,6 +682,11 @@ public final class FrequencyTrie<V> {
          */
         private final WordTraversalDirection traversalDirection;
 
+        /**
+         * Dictionary case processing mode associated with this builder.
+         */
+        private final CaseProcessingMode caseProcessingMode;
+
         /**
          * Mutable root node.
          */
@@ -679,9 +719,25 @@ public final class FrequencyTrie<V> {
          */
         public Builder(final IntFunction<V[]> arrayFactory, final ReductionSettings reductionSettings,
                 final WordTraversalDirection traversalDirection) {
+            this(arrayFactory, reductionSettings, traversalDirection, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
+        }
+
+        /**
+         * Creates a new builder with the provided settings, explicit traversal
+         * direction, and explicit case processing mode.
+         *
+         * @param arrayFactory       array factory
+         * @param reductionSettings  reduction configuration
+         * @param traversalDirection logical key traversal direction
+         * @param caseProcessingMode dictionary case processing mode
+         * @throws NullPointerException if any argument is {@code null}
+         */
+        public Builder(final IntFunction<V[]> arrayFactory, final ReductionSettings reductionSettings,
+                final WordTraversalDirection traversalDirection, final CaseProcessingMode caseProcessingMode) {
             this.arrayFactory = Objects.requireNonNull(arrayFactory, "arrayFactory");
             this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings");
             this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
+            this.caseProcessingMode = Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
             this.root = new MutableNode<>();
         }
 
@@ -753,8 +809,8 @@ public final class FrequencyTrie<V> {
                         reductionContext.canonicalNodeCount());
             }
 
-            final TrieMetadata metadata = TrieMetadata.current(STREAM_VERSION, this.traversalDirection,
-                    this.reductionSettings);
+            final TrieMetadata metadata = new TrieMetadata(STREAM_VERSION, this.traversalDirection,
+                    this.reductionSettings, DiacriticProcessingMode.AS_IS, this.caseProcessingMode);
             return new FrequencyTrie<>(this.arrayFactory, compiledRoot, metadata);
         }
 
diff --git a/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java b/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java
index 7313b3e..c3e1511 100644
--- a/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java
+++ b/src/main/java/org/egothor/stemmer/StemmerDictionaryParser.java
@@ -53,8 +53,8 @@ import java.util.logging.Logger;
  * to that stem.
  *
  * <p>
- * Input lines are normalized to lower case using {@link Locale#ROOT}. Leading
- * and trailing whitespace around each column is ignored.
+ * Input line case normalization is controlled by {@link CaseProcessingMode}.
+ * Leading and trailing whitespace around each column is ignored.
  *
  * <p>
  * The parser supports line remarks and trailing remarks. The remark markers
@@ -113,11 +113,27 @@ public final class StemmerDictionaryParser {
      * @throws IOException          if reading fails
      */
     public static ParseStatistics parse(final Path path, final EntryHandler entryHandler) throws IOException {
+        return parse(path, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, entryHandler);
+    }
+
+    /**
+     * Parses a dictionary file from a filesystem path.
+     *
+     * @param path               dictionary file path
+     * @param caseProcessingMode case processing mode
+     * @param entryHandler       handler receiving parsed entries
+     * @return parsing statistics
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if reading fails
+     */
+    public static ParseStatistics parse(final Path path, final CaseProcessingMode caseProcessingMode,
+            final EntryHandler entryHandler) throws IOException {
         Objects.requireNonNull(path, "path");
+        Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
         Objects.requireNonNull(entryHandler, "entryHandler");
 
         try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
-            return parse(reader, path.toAbsolutePath().toString(), entryHandler);
+            return parse(reader, path.toAbsolutePath().toString(), caseProcessingMode, entryHandler);
         }
     }
 
@@ -132,7 +148,23 @@ public final class StemmerDictionaryParser {
      */
     public static ParseStatistics parse(final String fileName, final EntryHandler entryHandler) throws IOException {
         Objects.requireNonNull(fileName, "fileName");
-        return parse(Path.of(fileName), entryHandler);
+        return parse(Path.of(fileName), CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, entryHandler);
+    }
+
+    /**
+     * Parses a dictionary file from a path string.
+     *
+     * @param fileName           dictionary file name or path string
+     * @param caseProcessingMode case processing mode
+     * @param entryHandler       handler receiving parsed entries
+     * @return parsing statistics
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if reading fails
+     */
+    public static ParseStatistics parse(final String fileName, final CaseProcessingMode caseProcessingMode,
+            final EntryHandler entryHandler) throws IOException {
+        Objects.requireNonNull(fileName, "fileName");
+        return parse(Path.of(fileName), caseProcessingMode, entryHandler);
     }
 
     /**
@@ -147,8 +179,25 @@ public final class StemmerDictionaryParser {
      */
     public static ParseStatistics parse(final Reader reader, final String sourceDescription,
             final EntryHandler entryHandler) throws IOException {
+        return parse(reader, sourceDescription, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, entryHandler);
+    }
+
+    /**
+     * Parses a dictionary from a reader.
+     *
+     * @param reader             source reader
+     * @param sourceDescription  logical source description for diagnostics
+     * @param caseProcessingMode case processing mode
+     * @param entryHandler       handler receiving parsed entries
+     * @return parsing statistics
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if reading or handler processing fails
+     */
+    public static ParseStatistics parse(final Reader reader, final String sourceDescription,
+            final CaseProcessingMode caseProcessingMode, final EntryHandler entryHandler) throws IOException {
         Objects.requireNonNull(reader, "reader");
         Objects.requireNonNull(sourceDescription, "sourceDescription");
+        Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
         Objects.requireNonNull(entryHandler, "entryHandler");
 
         final BufferedReader bufferedReader = reader instanceof BufferedReader ? (BufferedReader) reader
@@ -161,7 +210,7 @@ public final class StemmerDictionaryParser {
         for (String line = bufferedReader.readLine(); line != null; line = bufferedReader.readLine()) {
             lineNumber++;
 
-            final String normalizedLine = stripRemark(line).trim().toLowerCase(Locale.ROOT);
+            final String normalizedLine = normalizeLineCase(stripRemark(line).trim(), caseProcessingMode);
             if (normalizedLine.isEmpty()) {
                 ignoredLineCount++;
                 continue;
@@ -226,6 +275,20 @@ public final class StemmerDictionaryParser {
         return statistics;
     }
 
+    /**
+     * Applies case normalization to one line according to the selected mode.
+     *
+     * @param line               line to normalize
+     * @param caseProcessingMode case processing mode
+     * @return normalized line
+     */
+    private static String normalizeLineCase(final String line, final CaseProcessingMode caseProcessingMode) {
+        if (caseProcessingMode == CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT) {
+            return line.toLowerCase(Locale.ROOT);
+        }
+        return line;
+    }
+
     /**
      * Determines whether one dictionary item contains any Unicode whitespace
      * character.
diff --git a/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java b/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java
index 62aee96..e5d820c 100644
--- a/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java
+++ b/src/main/java/org/egothor/stemmer/StemmerPatchTrieLoader.java
@@ -283,7 +283,8 @@ public final class StemmerPatchTrieLoader {
         try (InputStream inputStream = openBundledResource(resourcePath);
                 BufferedReader reader = new BufferedReader(
                         new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
-            return load(reader, resourcePath, storeOriginal, reductionSettings, traversalDirectionOf(language));
+            return load(reader, resourcePath, storeOriginal, reductionSettings, traversalDirectionOf(language),
+                    CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
         }
     }
 
@@ -318,7 +319,8 @@ public final class StemmerPatchTrieLoader {
      */
     public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
             final ReductionSettings reductionSettings) throws IOException {
-        return load(path, storeOriginal, reductionSettings, WordTraversalDirection.BACKWARD);
+        return load(path, storeOriginal, reductionSettings, WordTraversalDirection.BACKWARD,
+                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
     }
 
     /**
@@ -338,14 +340,37 @@ public final class StemmerPatchTrieLoader {
     public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
             final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
             throws IOException {
+        return load(path, storeOriginal, reductionSettings, traversalDirection,
+                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
+    }
+
+    /**
+     * Loads a dictionary from a filesystem path using explicit reduction settings,
+     * explicit traversal direction, and explicit case processing mode.
+     *
+     * @param path               path to the dictionary file
+     * @param storeOriginal      whether the stem itself should be inserted using the
+     *                           canonical no-op patch command
+     * @param reductionSettings  reduction settings
+     * @param traversalDirection traversal direction used for both trie keys and
+     *                           patch commands
+     * @param caseProcessingMode case processing mode used during dictionary parsing
+     * @return compiled patch-command trie
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if the file cannot be opened or read
+     */
+    public static FrequencyTrie<String> load(final Path path, final boolean storeOriginal,
+            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
+            final CaseProcessingMode caseProcessingMode) throws IOException {
         Objects.requireNonNull(path, "path");
         Objects.requireNonNull(reductionSettings, "reductionSettings");
         Objects.requireNonNull(traversalDirection, "traversalDirection");
+        Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
 
         try (InputStream inputStream = openDictionaryInputStream(path);
                 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
             return load(reader, path.toAbsolutePath().toString(), storeOriginal, reductionSettings,
-                    traversalDirection);
+                    traversalDirection, caseProcessingMode);
         }
     }
 
@@ -403,7 +428,30 @@ public final class StemmerPatchTrieLoader {
             final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection)
             throws IOException {
         Objects.requireNonNull(fileName, "fileName");
-        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection);
+        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection,
+                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
+    }
+
+    /**
+     * Loads a dictionary from a filesystem path string using explicit reduction
+     * settings, explicit traversal direction, and explicit case processing mode.
+     *
+     * @param fileName           file name or path string
+     * @param storeOriginal      whether the stem itself should be inserted using the
+     *                           canonical no-op patch command
+     * @param reductionSettings  reduction settings
+     * @param traversalDirection traversal direction used for both trie keys and
+     *                           patch commands
+     * @param caseProcessingMode case processing mode used during dictionary parsing
+     * @return compiled patch-command trie
+     * @throws NullPointerException if any argument is {@code null}
+     * @throws IOException          if the file cannot be opened or read
+     */
+    public static FrequencyTrie<String> load(final String fileName, final boolean storeOriginal,
+            final ReductionSettings reductionSettings, final WordTraversalDirection traversalDirection,
+            final CaseProcessingMode caseProcessingMode) throws IOException {
+        Objects.requireNonNull(fileName, "fileName");
+        return load(Path.of(fileName), storeOriginal, reductionSettings, traversalDirection, caseProcessingMode);
     }
 
     /**
@@ -437,14 +485,15 @@ public final class StemmerPatchTrieLoader {
      */
     private static FrequencyTrie<String> load(final BufferedReader reader, final String sourceDescription,
             final boolean storeOriginal, final ReductionSettings reductionSettings,
-            final WordTraversalDirection traversalDirection) throws IOException {
+            final WordTraversalDirection traversalDirection, final CaseProcessingMode caseProcessingMode)
+            throws IOException {
         final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new, reductionSettings,
-                traversalDirection);
+                traversalDirection, caseProcessingMode);
         final PatchCommandEncoder patchCommandEncoder = new PatchCommandEncoder(traversalDirection);
         final int[] insertedMappings = new int[1];
 
         final StemmerDictionaryParser.ParseStatistics statistics = StemmerDictionaryParser.parse(reader,
-                sourceDescription, (stem, variants, lineNumber) -> {
+                sourceDescription, caseProcessingMode, (stem, variants, lineNumber) -> {
                     if (storeOriginal) {
                         builder.put(stem, NOOP_PATCH_COMMAND);
                         insertedMappings[0]++;
diff --git a/src/main/java/org/egothor/stemmer/TrieMetadata.java b/src/main/java/org/egothor/stemmer/TrieMetadata.java
index b5703bc..77d2ad1 100644
--- a/src/main/java/org/egothor/stemmer/TrieMetadata.java
+++ b/src/main/java/org/egothor/stemmer/TrieMetadata.java
@@ -54,9 +54,12 @@ import java.util.Objects;
  * @param reductionSettings       reduction settings used during compilation
  * @param diacriticProcessingMode diacritic processing strategy associated with
  *                                the artifact
+ * @param caseProcessingMode      case processing strategy associated with the
+ *                                artifact
  */
 public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDirection,
-        ReductionSettings reductionSettings, DiacriticProcessingMode diacriticProcessingMode) {
+        ReductionSettings reductionSettings, DiacriticProcessingMode diacriticProcessingMode,
+        CaseProcessingMode caseProcessingMode) {
 
     /**
      * Creates a new metadata instance.
@@ -66,9 +69,11 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
      * @param traversalDirection      logical key traversal direction
      * @param reductionSettings       reduction settings used during compilation
      * @param diacriticProcessingMode diacritic processing strategy
+     * @param caseProcessingMode      case processing strategy
      */
     public TrieMetadata(final int formatVersion, final WordTraversalDirection traversalDirection,
-            final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode) {
+            final ReductionSettings reductionSettings, final DiacriticProcessingMode diacriticProcessingMode,
+            final CaseProcessingMode caseProcessingMode) {
         if (formatVersion < 1) { // NOPMD
             throw new IllegalArgumentException("formatVersion must be at least 1.");
         }
@@ -76,6 +81,7 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
         this.traversalDirection = Objects.requireNonNull(traversalDirection, "traversalDirection");
         this.reductionSettings = Objects.requireNonNull(reductionSettings, "reductionSettings");
         this.diacriticProcessingMode = Objects.requireNonNull(diacriticProcessingMode, "diacriticProcessingMode");
+        this.caseProcessingMode = Objects.requireNonNull(caseProcessingMode, "caseProcessingMode");
     }
 
     /**
@@ -89,7 +95,8 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
      */
     public static TrieMetadata current(final int formatVersion, final WordTraversalDirection traversalDirection,
             final ReductionSettings reductionSettings) {
-        return new TrieMetadata(formatVersion, traversalDirection, reductionSettings, DiacriticProcessingMode.AS_IS);
+        return new TrieMetadata(formatVersion, traversalDirection, reductionSettings, DiacriticProcessingMode.AS_IS,
+                CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
     }
 
     /**
@@ -104,6 +111,6 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
     public static TrieMetadata legacy(final int formatVersion, final WordTraversalDirection traversalDirection) {
         return new TrieMetadata(formatVersion, traversalDirection,
                 ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
-                DiacriticProcessingMode.AS_IS);
+                DiacriticProcessingMode.AS_IS, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
     }
 }
diff --git a/src/main/java/org/egothor/stemmer/package-info.java b/src/main/java/org/egothor/stemmer/package-info.java
index 5653438..315cc99 100644
--- a/src/main/java/org/egothor/stemmer/package-info.java
+++ b/src/main/java/org/egothor/stemmer/package-info.java
@@ -60,7 +60,9 @@
  * non-empty logical line starts with a canonical stem followed by known surface
  * variants in subsequent tab-separated columns.
  * Parsing is delegated to {@link org.egothor.stemmer.StemmerDictionaryParser},
- * which normalizes input to lower case using {@link java.util.Locale#ROOT},
+ * which applies configurable case processing through
+ * {@link org.egothor.stemmer.CaseProcessingMode} (default:
+ * {@link org.egothor.stemmer.CaseProcessingMode#LOWERCASE_WITH_LOCALE_ROOT}),
  * supports whole-line as well as trailing remarks introduced by {@code #} or
  * {@code //}, and currently ignores dictionary items containing Unicode
  * whitespace characters while reporting them through warning-level diagnostics.
diff --git a/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java b/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java
index 97eaffb..a1e9878 100644
--- a/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java
+++ b/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java
@@ -201,6 +201,43 @@ class FrequencyTrieTest {
                 () -> assertArrayEquals(new String[] { "noun", "agent" }, trie.getAll("runner")));
     }
 
+    /**
+     * Verifies that lookup-time key normalization follows persisted case processing
+     * metadata.
+     */
+    @Test
+    @DisplayName("Lookup applies lowercase normalization when metadata requires it")
+    void lookupAppliesLowercaseNormalizationWhenMetadataRequiresIt() {
+        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new,
+                ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
+                WordTraversalDirection.BACKWARD, CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT);
+        builder.put("house", "noun");
+        builder.put("house", "verb");
+
+        final FrequencyTrie<String> trie = builder.build();
+
+        assertAll(() -> assertEquals("noun", trie.get("HOUSE")),
+                () -> assertArrayEquals(new String[] { "noun", "verb" }, trie.getAll("HoUsE")));
+    }
+
+    /**
+     * Verifies that lookup preserves casing when metadata uses AS_IS mode.
+     */
+    @Test
+    @DisplayName("Lookup keeps case-sensitive behavior when metadata is AS_IS")
+    void lookupKeepsCaseSensitiveBehaviorWhenMetadataIsAsIs() {
+        final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(String[]::new,
+                ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS),
+                WordTraversalDirection.BACKWARD, CaseProcessingMode.AS_IS);
+        builder.put("House", "noun");
+
+        final FrequencyTrie<String> trie = builder.build();
+
+        assertAll(() -> assertEquals("noun", trie.get("House")), () -> assertNull(trie.get("house")),
+                () -> assertArrayEquals(new String[] { "noun" }, trie.getAll("House")),
+                () -> assertArrayEquals(new String[0], trie.getAll("HOUSE")));
+    }
+
     /**
      * Verifies that a missing path below an existing prefix returns empty results.
      */
diff --git a/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java b/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java
index 580627e..353c29c 100644
--- a/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java
+++ b/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java
@@ -64,7 +64,7 @@ import org.junit.jupiter.api.io.TempDir;
  * </p>
  * <ul>
  * <li>parsing through all public overloads,</li>
- * <li>normalization to lower case,</li>
+ * <li>case processing according to the selected mode,</li>
  * <li>handling of empty lines and remarks,</li>
  * <li>correct entry emission including line numbers,</li>
  * <li>propagation of I/O failures from the handler and file system,</li>
@@ -280,6 +280,22 @@ class StemmerDictionaryParserTest {
             assertEquals(expected, exception, "The original exception instance should be preserved.");
         }
 
+        @Test
+        @DisplayName("should preserve character case when AS_IS mode is selected")
+        void shouldPreserveCharacterCaseWhenAsIsModeIsSelected() throws IOException {
+            final String input = "Root\tRunning\tRuns\tRUNNER\n";
+            final List<CapturedEntry> entries = new ArrayList<CapturedEntry>();
+
+            final StemmerDictionaryParser.ParseStatistics statistics = StemmerDictionaryParser.parse(
+                    new StringReader(input), "case-as-is", CaseProcessingMode.AS_IS, collectingHandler(entries));
+
+            assertAll("Statistics", () -> assertEquals(1, statistics.lineCount()),
+                    () -> assertEquals(1, statistics.entryCount()), () -> assertEquals(0, statistics.ignoredLineCount()));
+            assertEquals(1, entries.size(), "Exactly one entry should be emitted.");
+            assertAll("Entry", () -> assertEquals("Root", entries.get(0).stem()),
+                    () -> assertArrayEquals(new String[] { "Running", "Runs", "RUNNER" }, entries.get(0).variants()));
+        }
+
         @Test
         @DisplayName("should reject null reader")
         void shouldRejectNullReader() {
@@ -298,6 +314,15 @@ class StemmerDictionaryParserTest {
                     }));
         }
 
+        @Test
+        @DisplayName("should reject null case processing mode")
+        void shouldRejectNullCaseProcessingMode() {
+            assertThrows(NullPointerException.class, () -> StemmerDictionaryParser.parse(new StringReader("a b"),
+                    "source", null, (stem, variants, lineNumber) -> {
+                        // no-op
+                    }));
+        }
+
         @Test
         @DisplayName("should reject null entry handler")
         void shouldRejectNullEntryHandler() {