";
+ }
+
+ /**
+ * Parsed command-line arguments.
+ *
+ * @param inputPath source stemmer file path
+ * @param outputPath target compressed artifact path
+ * @param storeOriginal whether original words are stored as identity rules
+ * @param reductionMode reduction mode to apply during compilation
+ */
+ private record Arguments(Path inputPath, Path outputPath, boolean storeOriginal, ReductionMode reductionMode) {
+
+ /**
+ * Parses the supplied command-line arguments.
+ *
+ * @param args command-line arguments
+ * @return parsed argument record
+ */
+ private static Arguments parse(final String[] args) {
+ Objects.requireNonNull(args, "args");
+
+ Path inputPath = null;
+ Path outputPath = null;
+ Boolean storeOriginal = null;
+ ReductionMode reductionMode = null;
+
+ int index = 0;
+ while (index < args.length) {
+ final String argument = args[index];
+
+ switch (argument) {
+ case "--input":
+ inputPath = Path.of(readRequiredValue(args, index, argument));
+ index += 2;
+ break;
+ case "--output":
+ outputPath = Path.of(readRequiredValue(args, index, argument));
+ index += 2;
+ break;
+ case "--store-original":
+ storeOriginal = Boolean.valueOf(readRequiredValue(args, index, argument));
+ index += 2;
+ break;
+ case "--reduction-mode":
+ reductionMode = ReductionMode.valueOf(readRequiredValue(args, index, argument));
+ index += 2;
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown argument: " + argument);
+ }
+ }
+
+ if (inputPath == null) {
+ throw new IllegalArgumentException("Missing required argument: --input");
+ }
+ if (outputPath == null) {
+ throw new IllegalArgumentException("Missing required argument: --output");
+ }
+ if (storeOriginal == null) {
+ throw new IllegalArgumentException("Missing required argument: --store-original");
+ }
+ if (reductionMode == null) {
+ throw new IllegalArgumentException("Missing required argument: --reduction-mode");
+ }
+
+ return new Arguments(inputPath, outputPath, storeOriginal.booleanValue(), reductionMode);
+ }
+
+ /**
+ * Reads the required value immediately following an option key.
+ *
+ * @param args command-line arguments
+ * @param index current option index
+ * @param argument option key
+ * @return option value
+ */
+ private static String readRequiredValue(final String[] args, final int index, final String argument) {
+ final int valueIndex = index + 1;
+ if (valueIndex >= args.length) {
+ throw new IllegalArgumentException("Missing value for argument: " + argument);
+ }
+ return args[valueIndex];
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/org/egothor/stemmer/RegressionArtifactSupport.java b/src/test/java/org/egothor/stemmer/RegressionArtifactSupport.java
new file mode 100644
index 0000000..efd5ff2
--- /dev/null
+++ b/src/test/java/org/egothor/stemmer/RegressionArtifactSupport.java
@@ -0,0 +1,217 @@
+package org.egothor.stemmer;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.HexFormat;
+import java.util.Objects;
+
+/**
+ * Test support utilities for compiled-artifact regression testing.
+ *
+ *
+ * This helper centralizes resource loading, artifact compilation, digest
+ * calculation, and failure-message formatting so that regression tests stay
+ * focused on contract verification.
+ */
+final class RegressionArtifactSupport {
+
+ /**
+ * Utility class.
+ */
+ private RegressionArtifactSupport() {
+ throw new AssertionError("No instances.");
+ }
+
+ /**
+ * Copies a classpath resource to a filesystem path.
+ *
+ * @param resourcePath source resource path
+ * @param targetPath target file path
+ * @return target path
+ * @throws IOException if copying fails
+ */
+ static Path copyResourceToFile(final String resourcePath, final Path targetPath) throws IOException {
+ Objects.requireNonNull(resourcePath, "resourcePath");
+ Objects.requireNonNull(targetPath, "targetPath");
+
+ final Path parent = targetPath.toAbsolutePath().getParent();
+ if (parent != null) {
+ Files.createDirectories(parent);
+ }
+
+ try (InputStream inputStream = openResource(resourcePath)) {
+ Files.copy(inputStream, targetPath);
+ }
+
+ return targetPath;
+ }
+
+ /**
+ * Reads the complete bytes of a classpath resource.
+ *
+ * @param resourcePath resource path
+ * @return resource bytes
+ * @throws IOException if reading fails
+ */
+ static byte[] readResourceBytes(final String resourcePath) throws IOException {
+ Objects.requireNonNull(resourcePath, "resourcePath");
+
+ try (InputStream inputStream = openResource(resourcePath)) {
+ return inputStream.readAllBytes();
+ }
+ }
+
+ /**
+ * Reads a SHA-256 sidecar resource.
+ *
+ *
+ * The sidecar may contain either just the hash or the conventional
+ * {@code ""} form. Only the first token is used.
+ *
+ * @param resourcePath SHA-256 sidecar resource path
+ * @return normalized lowercase hex hash
+ * @throws IOException if reading fails
+ */
+ static String readSha256Resource(final String resourcePath) throws IOException {
+ final String content = new String(readResourceBytes(resourcePath), StandardCharsets.UTF_8).trim();
+ final int firstWhitespace = findFirstWhitespace(content);
+ final String hash = firstWhitespace < 0 ? content : content.substring(0, firstWhitespace);
+ return hash.toLowerCase(java.util.Locale.ROOT);
+ }
+
+ /**
+ * Compiles a source dictionary into a compressed binary artifact and writes it
+ * to the supplied file path.
+ *
+ * @param sourcePath dictionary source file
+ * @param storeOriginal whether stems are stored using no-op mappings
+ * @param reductionSettings reduction settings
+ * @param artifactOutputPath output artifact path
+ * @return written artifact bytes
+ * @throws IOException if compilation or writing fails
+ */
+ static byte[] compileToArtifact(final Path sourcePath, final boolean storeOriginal,
+ final ReductionSettings reductionSettings, final Path artifactOutputPath) throws IOException {
+ Objects.requireNonNull(sourcePath, "sourcePath");
+ Objects.requireNonNull(reductionSettings, "reductionSettings");
+ Objects.requireNonNull(artifactOutputPath, "artifactOutputPath");
+
+ final FrequencyTrie trie = StemmerPatchTrieLoader.load(sourcePath, storeOriginal, reductionSettings);
+ StemmerPatchTrieBinaryIO.write(trie, artifactOutputPath);
+ return Files.readAllBytes(artifactOutputPath);
+ }
+
+ /**
+ * Compiles a source dictionary into compressed binary artifact bytes without
+ * persisting the result on disk.
+ *
+ * @param sourcePath dictionary source file
+ * @param storeOriginal whether stems are stored using no-op mappings
+ * @param reductionSettings reduction settings
+ * @return artifact bytes
+ * @throws IOException if compilation fails
+ */
+ static byte[] compileToArtifactBytes(final Path sourcePath, final boolean storeOriginal,
+ final ReductionSettings reductionSettings) throws IOException {
+ Objects.requireNonNull(sourcePath, "sourcePath");
+ Objects.requireNonNull(reductionSettings, "reductionSettings");
+
+ final FrequencyTrie trie = StemmerPatchTrieLoader.load(sourcePath, storeOriginal, reductionSettings);
+
+ try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
+ StemmerPatchTrieBinaryIO.write(trie, outputStream);
+ return outputStream.toByteArray();
+ }
+ }
+
+ /**
+ * Computes the lowercase hexadecimal SHA-256 digest of the supplied bytes.
+ *
+ * @param bytes input bytes
+ * @return lowercase hexadecimal SHA-256 digest
+ */
+ static String sha256Hex(final byte[] bytes) {
+ Objects.requireNonNull(bytes, "bytes");
+
+ try {
+ final MessageDigest messageDigest = MessageDigest.getInstance("SHA-256");
+ return HexFormat.of().formatHex(messageDigest.digest(bytes));
+ } catch (NoSuchAlgorithmException exception) {
+ throw new IllegalStateException("SHA-256 digest is unavailable.", exception);
+ }
+ }
+
+ /**
+ * Builds a descriptive mismatch message for golden-artifact failures.
+ *
+ * @param caseId regression case identifier
+ * @param expectedSha256 expected digest
+ * @param actualSha256 actual digest
+ * @param actualPath location of the produced artifact
+ * @return mismatch message
+ */
+ static String mismatchMessage(final String caseId, final String expectedSha256, final String actualSha256,
+ final Path actualPath) {
+ return "Golden artifact mismatch for case '" + caseId + "'. Expected SHA-256=" + expectedSha256
+ + ", actual SHA-256=" + actualSha256 + ", produced artifact=" + actualPath.toAbsolutePath();
+ }
+
+ /**
+ * Opens a classpath resource.
+ *
+ * @param resourcePath resource path
+ * @return opened resource stream
+ * @throws IOException if the resource does not exist
+ */
+ private static InputStream openResource(final String resourcePath) throws IOException {
+ Objects.requireNonNull(resourcePath, "resourcePath");
+
+ final String normalizedPath = resourcePath.startsWith("/") ? resourcePath : "/" + resourcePath;
+
+ InputStream inputStream = RegressionArtifactSupport.class.getResourceAsStream(normalizedPath);
+ if (inputStream != null) {
+ return inputStream;
+ }
+
+ final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
+ if (contextClassLoader != null) {
+ inputStream = contextClassLoader
+ .getResourceAsStream(normalizedPath.startsWith("/") ? normalizedPath.substring(1) : normalizedPath);
+ if (inputStream != null) {
+ return inputStream;
+ }
+ }
+
+ final ClassLoader classLoader = RegressionArtifactSupport.class.getClassLoader();
+ if (classLoader != null) {
+ inputStream = classLoader
+ .getResourceAsStream(normalizedPath.startsWith("/") ? normalizedPath.substring(1) : normalizedPath);
+ if (inputStream != null) {
+ return inputStream;
+ }
+ }
+
+ throw new IOException("Classpath resource not found: " + resourcePath);
+ }
+
+ /**
+ * Finds the index of the first whitespace character.
+ *
+ * @param text text to inspect
+ * @return first whitespace index, or {@code -1} when no whitespace is present
+ */
+ private static int findFirstWhitespace(final String text) {
+ for (int index = 0; index < text.length(); index++) {
+ if (Character.isWhitespace(text.charAt(index))) {
+ return index;
+ }
+ }
+ return -1;
+ }
+}
\ No newline at end of file
diff --git a/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz
new file mode 100644
index 0000000..4e60c29
Binary files /dev/null and b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz differ
diff --git a/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz.sha256 b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz.sha256
new file mode 100644
index 0000000..08b5c88
--- /dev/null
+++ b/src/test/resources/regression/golden/branching-en-ranked-no-storeorig.gz.sha256
@@ -0,0 +1 @@
+62f6419ebab324a69e2e4ef9753687326aa20eed4e851a0f2b63a10f50d2eaae branching-en-ranked-no-storeorig.gz
diff --git a/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz
new file mode 100644
index 0000000..d07dabb
Binary files /dev/null and b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz differ
diff --git a/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz.sha256 b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz.sha256
new file mode 100644
index 0000000..495ea9e
--- /dev/null
+++ b/src/test/resources/regression/golden/mini-en-ranked-storeorig.gz.sha256
@@ -0,0 +1 @@
+7b65be9ed9ffab418ed2d1fccc219ea6925e192aa27cdefe5c8383570becd28f mini-en-ranked-storeorig.gz
diff --git a/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz
new file mode 100644
index 0000000..d07dabb
Binary files /dev/null and b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz differ
diff --git a/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz.sha256 b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz.sha256
new file mode 100644
index 0000000..aa29c85
--- /dev/null
+++ b/src/test/resources/regression/golden/mini-en-unordered-storeorig.gz.sha256
@@ -0,0 +1 @@
+7b65be9ed9ffab418ed2d1fccc219ea6925e192aa27cdefe5c8383570becd28f mini-en-unordered-storeorig.gz
diff --git a/src/test/resources/regression/sources/branching-en.stemmer b/src/test/resources/regression/sources/branching-en.stemmer
new file mode 100644
index 0000000..b25e967
--- /dev/null
+++ b/src/test/resources/regression/sources/branching-en.stemmer
@@ -0,0 +1,5 @@
+# Focused on subtree branching and repeated suffix families
+connect connected connecting connects connection
+collect collected collecting collects collection
+inspect inspected inspecting inspects inspection
+direct directed directing directs direction
diff --git a/src/test/resources/regression/sources/mini-en.stemmer b/src/test/resources/regression/sources/mini-en.stemmer
new file mode 100644
index 0000000..3eb2ba2
--- /dev/null
+++ b/src/test/resources/regression/sources/mini-en.stemmer
@@ -0,0 +1,6 @@
+# Basic English sample with remarks and mixed suffix patterns
+run running runs runner
+study studies studying
+city cities
+fly flies flying
+stop stopped stopping stops
diff --git a/tools/generate-regression-artifacts.sh b/tools/generate-regression-artifacts.sh
new file mode 100755
index 0000000..a51b43f
--- /dev/null
+++ b/tools/generate-regression-artifacts.sh
@@ -0,0 +1,256 @@
+#!/usr/bin/env bash
+set -Eeuo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+SOURCE_DIR="${PROJECT_DIR}/src/test/resources/regression/sources"
+GOLDEN_DIR="${PROJECT_DIR}/src/test/resources/regression/golden"
+BUILD_DIR="${PROJECT_DIR}/build/tmp/regression-artifacts"
+
+MAIN_CLASS="org.egothor.stemmer.RegressionArtifactGenerator"
+
+usage() {
+ cat <<'EOF'
+Generate deterministic compiled trie regression artifacts and SHA-256 sidecar files.
+
+Usage:
+ generate-regression-artifacts.sh [--clean] [--case ]...
+
+Options:
+ --clean Remove previously generated temporary files before execution.
+ --case Generate only the selected case. May be repeated.
+ --help Show this help.
+
+Known case identifiers:
+ 01-mini-ranked-store-original
+ 02-mini-unordered-store-original
+ 03-branching-ranked-no-store-original
+
+Notes:
+ - This script expects a helper Java class:
+ org.egothor.stemmer.RegressionArtifactGenerator
+ - The helper should compile the stemmer source into a .gz artifact using the
+ project's real binary writer implementation.
+ - The script writes:
+ src/test/resources/regression/golden/*.gz
+ src/test/resources/regression/golden/*.gz.sha256
+EOF
+}
+
+log() {
+ printf '[INFO] %s\n' "$*"
+}
+
+fail() {
+ printf '[ERROR] %s\n' "$*" >&2
+ exit 1
+}
+
+require_file() {
+ local path="$1"
+ [[ -f "${path}" ]] || fail "Required file not found: ${path}"
+}
+
+compute_sha256() {
+ local file_path="$1"
+ local file_name
+ file_name="$(basename "${file_path}")"
+
+ if command -v sha256sum >/dev/null 2>&1; then
+ local digest
+ digest="$(sha256sum "${file_path}" | awk '{print $1}')"
+ printf '%s %s\n' "${digest}" "${file_name}"
+ return 0
+ fi
+
+ if command -v shasum >/dev/null 2>&1; then
+ local digest
+ digest="$(shasum -a 256 "${file_path}" | awk '{print $1}')"
+ printf '%s %s\n' "${digest}" "${file_name}"
+ return 0
+ fi
+
+ if command -v openssl >/dev/null 2>&1; then
+ local digest
+ digest="$(openssl dgst -sha256 "${file_path}" | awk '{print $2}')"
+ printf '%s %s\n' "${digest}" "${file_name}"
+ return 0
+ fi
+
+ fail "No SHA-256 tool available. Install sha256sum, shasum, or openssl."
+}
+
+declare -a REQUESTED_CASES=()
+CLEAN="false"
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --clean)
+ CLEAN="true"
+ shift
+ ;;
+ --case)
+ [[ $# -ge 2 ]] || fail "Missing value for --case."
+ REQUESTED_CASES+=("$2")
+ shift 2
+ ;;
+ --help|-h)
+ usage
+ exit 0
+ ;;
+ *)
+ fail "Unknown argument: $1"
+ ;;
+ esac
+done
+
+mkdir -p "${GOLDEN_DIR}"
+mkdir -p "${BUILD_DIR}"
+
+if [[ "${CLEAN}" == "true" ]]; then
+ log "Cleaning temporary directory: ${BUILD_DIR}"
+ rm -rf "${BUILD_DIR}"
+ mkdir -p "${BUILD_DIR}"
+fi
+
+declare -a CASE_IDS=()
+declare -A CASE_SOURCE=()
+declare -A CASE_STORE_ORIGINAL=()
+declare -A CASE_REDUCTION_MODE=()
+declare -A CASE_ARTIFACT=()
+
+CASE_IDS+=("01-mini-ranked-store-original")
+CASE_SOURCE["01-mini-ranked-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
+CASE_STORE_ORIGINAL["01-mini-ranked-store-original"]="true"
+CASE_REDUCTION_MODE["01-mini-ranked-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
+CASE_ARTIFACT["01-mini-ranked-store-original"]="${GOLDEN_DIR}/mini-en-ranked-storeorig.gz"
+
+CASE_IDS+=("02-mini-unordered-store-original")
+CASE_SOURCE["02-mini-unordered-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
+CASE_STORE_ORIGINAL["02-mini-unordered-store-original"]="true"
+CASE_REDUCTION_MODE["02-mini-unordered-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS"
+CASE_ARTIFACT["02-mini-unordered-store-original"]="${GOLDEN_DIR}/mini-en-unordered-storeorig.gz"
+
+CASE_IDS+=("03-branching-ranked-no-store-original")
+CASE_SOURCE["03-branching-ranked-no-store-original"]="${SOURCE_DIR}/branching-en.stemmer"
+CASE_STORE_ORIGINAL["03-branching-ranked-no-store-original"]="false"
+CASE_REDUCTION_MODE["03-branching-ranked-no-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
+CASE_ARTIFACT["03-branching-ranked-no-store-original"]="${GOLDEN_DIR}/branching-en-ranked-no-storeorig.gz"
+
+is_requested_case() {
+ local case_id="$1"
+
+ if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
+ return 0
+ fi
+
+ local requested
+ for requested in "${REQUESTED_CASES[@]}"; do
+ if [[ "${requested}" == "${case_id}" ]]; then
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+validate_requested_cases() {
+ if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
+ return 0
+ fi
+
+ local requested
+ local known
+ local found
+
+ for requested in "${REQUESTED_CASES[@]}"; do
+ found="false"
+ for known in "${CASE_IDS[@]}"; do
+ if [[ "${requested}" == "${known}" ]]; then
+ found="true"
+ break
+ fi
+ done
+ [[ "${found}" == "true" ]] || fail "Unknown case identifier: ${requested}"
+ done
+}
+
+run_generator() {
+ local input_file="$1"
+ local output_file="$2"
+ local store_original="$3"
+ local reduction_mode="$4"
+
+ "${PROJECT_DIR}/gradlew" \
+ --no-daemon \
+ -q \
+ testClasses \
+ regressionArtifactGenerator \
+ -PregressionInput="${input_file}" \
+ -PregressionOutput="${output_file}" \
+ -PregressionStoreOriginal="${store_original}" \
+ -PregressionReductionMode="${reduction_mode}"
+}
+
+# Fallback path when the project does not expose a generic run task.
+run_generator_with_javaexec_fallback() {
+ local input_file="$1"
+ local output_file="$2"
+ local store_original="$3"
+ local reduction_mode="$4"
+
+ "${PROJECT_DIR}/gradlew" \
+ --no-daemon \
+ -q \
+ testClasses \
+ -PregressionGeneratorMainClass="${MAIN_CLASS}" \
+ -PregressionGeneratorArgs="--input=${input_file} --output=${output_file} --store-original=${store_original} --reduction-mode=${reduction_mode}" \
+ regressionArtifactGenerator
+}
+
+generate_case() {
+ local case_id="$1"
+ local source_file="${CASE_SOURCE[${case_id}]}"
+ local artifact_file="${CASE_ARTIFACT[${case_id}]}"
+ local sha_file="${artifact_file}.sha256"
+ local store_original="${CASE_STORE_ORIGINAL[${case_id}]}"
+ local reduction_mode="${CASE_REDUCTION_MODE[${case_id}]}"
+ local temp_output="${BUILD_DIR}/$(basename "${artifact_file}")"
+
+ require_file "${source_file}"
+
+ log "Generating case: ${case_id}"
+ log " source: ${source_file}"
+ log " artifact: ${artifact_file}"
+ log " reduction mode: ${reduction_mode}"
+ log " store original: ${store_original}"
+
+ rm -f "${temp_output}"
+
+ if "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^run '; then
+ run_generator "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
+ elif "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^regressionArtifactGenerator '; then
+ run_generator_with_javaexec_fallback "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
+ else
+ fail "No supported Gradle execution path found. Expected a 'run' or 'regressionArtifactGenerator' task."
+ fi
+
+ require_file "${temp_output}"
+
+ mv "${temp_output}" "${artifact_file}"
+ compute_sha256 "${artifact_file}" > "${sha_file}"
+
+ log " wrote artifact: ${artifact_file}"
+ log " wrote digest: ${sha_file}"
+}
+
+validate_requested_cases
+
+for case_id in "${CASE_IDS[@]}"; do
+ if is_requested_case "${case_id}"; then
+ generate_case "${case_id}"
+ fi
+done
+
+log "Regression artifacts were generated successfully."