Files
Radixor/tools/generate-regression-artifacts.sh
Leo Galambos ad8fe0ea1b feat: add deterministic compiled-trie artifact regression tooling
test: add deterministic regression coverage for compiled trie artifacts
test: add golden artifact resources and SHA-256 sidecar validation
test: add compiled trie artifact generator utility for regression preparation
build: add Gradle task for regression artifact generation
chore: add bash script to generate golden compiled trie regression files
fix: normalize SHA-256 sidecar output to use artifact basename only
fix: harden test resource loading for regression classpath access
fix: reconstruct stems from patch commands in golden artifact semantic probes
2026-04-14 19:12:51 +02:00

257 lines
7.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -Eeuo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
SOURCE_DIR="${PROJECT_DIR}/src/test/resources/regression/sources"
GOLDEN_DIR="${PROJECT_DIR}/src/test/resources/regression/golden"
BUILD_DIR="${PROJECT_DIR}/build/tmp/regression-artifacts"
MAIN_CLASS="org.egothor.stemmer.RegressionArtifactGenerator"
usage() {
cat <<'EOF'
Generate deterministic compiled trie regression artifacts and SHA-256 sidecar files.
Usage:
generate-regression-artifacts.sh [--clean] [--case <id>]...
Options:
--clean Remove previously generated temporary files before execution.
--case <id> Generate only the selected case. May be repeated.
--help Show this help.
Known case identifiers:
01-mini-ranked-store-original
02-mini-unordered-store-original
03-branching-ranked-no-store-original
Notes:
- This script expects a helper Java class:
org.egothor.stemmer.RegressionArtifactGenerator
- The helper should compile the stemmer source into a .gz artifact using the
project's real binary writer implementation.
- The script writes:
src/test/resources/regression/golden/*.gz
src/test/resources/regression/golden/*.gz.sha256
EOF
}
log() {
printf '[INFO] %s\n' "$*"
}
fail() {
printf '[ERROR] %s\n' "$*" >&2
exit 1
}
require_file() {
local path="$1"
[[ -f "${path}" ]] || fail "Required file not found: ${path}"
}
compute_sha256() {
local file_path="$1"
local file_name
file_name="$(basename "${file_path}")"
if command -v sha256sum >/dev/null 2>&1; then
local digest
digest="$(sha256sum "${file_path}" | awk '{print $1}')"
printf '%s %s\n' "${digest}" "${file_name}"
return 0
fi
if command -v shasum >/dev/null 2>&1; then
local digest
digest="$(shasum -a 256 "${file_path}" | awk '{print $1}')"
printf '%s %s\n' "${digest}" "${file_name}"
return 0
fi
if command -v openssl >/dev/null 2>&1; then
local digest
digest="$(openssl dgst -sha256 "${file_path}" | awk '{print $2}')"
printf '%s %s\n' "${digest}" "${file_name}"
return 0
fi
fail "No SHA-256 tool available. Install sha256sum, shasum, or openssl."
}
declare -a REQUESTED_CASES=()
CLEAN="false"
while [[ $# -gt 0 ]]; do
case "$1" in
--clean)
CLEAN="true"
shift
;;
--case)
[[ $# -ge 2 ]] || fail "Missing value for --case."
REQUESTED_CASES+=("$2")
shift 2
;;
--help|-h)
usage
exit 0
;;
*)
fail "Unknown argument: $1"
;;
esac
done
mkdir -p "${GOLDEN_DIR}"
mkdir -p "${BUILD_DIR}"
if [[ "${CLEAN}" == "true" ]]; then
log "Cleaning temporary directory: ${BUILD_DIR}"
rm -rf "${BUILD_DIR}"
mkdir -p "${BUILD_DIR}"
fi
declare -a CASE_IDS=()
declare -A CASE_SOURCE=()
declare -A CASE_STORE_ORIGINAL=()
declare -A CASE_REDUCTION_MODE=()
declare -A CASE_ARTIFACT=()
CASE_IDS+=("01-mini-ranked-store-original")
CASE_SOURCE["01-mini-ranked-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
CASE_STORE_ORIGINAL["01-mini-ranked-store-original"]="true"
CASE_REDUCTION_MODE["01-mini-ranked-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
CASE_ARTIFACT["01-mini-ranked-store-original"]="${GOLDEN_DIR}/mini-en-ranked-storeorig.gz"
CASE_IDS+=("02-mini-unordered-store-original")
CASE_SOURCE["02-mini-unordered-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
CASE_STORE_ORIGINAL["02-mini-unordered-store-original"]="true"
CASE_REDUCTION_MODE["02-mini-unordered-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS"
CASE_ARTIFACT["02-mini-unordered-store-original"]="${GOLDEN_DIR}/mini-en-unordered-storeorig.gz"
CASE_IDS+=("03-branching-ranked-no-store-original")
CASE_SOURCE["03-branching-ranked-no-store-original"]="${SOURCE_DIR}/branching-en.stemmer"
CASE_STORE_ORIGINAL["03-branching-ranked-no-store-original"]="false"
CASE_REDUCTION_MODE["03-branching-ranked-no-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
CASE_ARTIFACT["03-branching-ranked-no-store-original"]="${GOLDEN_DIR}/branching-en-ranked-no-storeorig.gz"
is_requested_case() {
local case_id="$1"
if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
return 0
fi
local requested
for requested in "${REQUESTED_CASES[@]}"; do
if [[ "${requested}" == "${case_id}" ]]; then
return 0
fi
done
return 1
}
validate_requested_cases() {
if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
return 0
fi
local requested
local known
local found
for requested in "${REQUESTED_CASES[@]}"; do
found="false"
for known in "${CASE_IDS[@]}"; do
if [[ "${requested}" == "${known}" ]]; then
found="true"
break
fi
done
[[ "${found}" == "true" ]] || fail "Unknown case identifier: ${requested}"
done
}
run_generator() {
local input_file="$1"
local output_file="$2"
local store_original="$3"
local reduction_mode="$4"
"${PROJECT_DIR}/gradlew" \
--no-daemon \
-q \
testClasses \
regressionArtifactGenerator \
-PregressionInput="${input_file}" \
-PregressionOutput="${output_file}" \
-PregressionStoreOriginal="${store_original}" \
-PregressionReductionMode="${reduction_mode}"
}
# Fallback path when the project does not expose a generic run task.
run_generator_with_javaexec_fallback() {
local input_file="$1"
local output_file="$2"
local store_original="$3"
local reduction_mode="$4"
"${PROJECT_DIR}/gradlew" \
--no-daemon \
-q \
testClasses \
-PregressionGeneratorMainClass="${MAIN_CLASS}" \
-PregressionGeneratorArgs="--input=${input_file} --output=${output_file} --store-original=${store_original} --reduction-mode=${reduction_mode}" \
regressionArtifactGenerator
}
generate_case() {
local case_id="$1"
local source_file="${CASE_SOURCE[${case_id}]}"
local artifact_file="${CASE_ARTIFACT[${case_id}]}"
local sha_file="${artifact_file}.sha256"
local store_original="${CASE_STORE_ORIGINAL[${case_id}]}"
local reduction_mode="${CASE_REDUCTION_MODE[${case_id}]}"
local temp_output="${BUILD_DIR}/$(basename "${artifact_file}")"
require_file "${source_file}"
log "Generating case: ${case_id}"
log " source: ${source_file}"
log " artifact: ${artifact_file}"
log " reduction mode: ${reduction_mode}"
log " store original: ${store_original}"
rm -f "${temp_output}"
if "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^run '; then
run_generator "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
elif "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^regressionArtifactGenerator '; then
run_generator_with_javaexec_fallback "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
else
fail "No supported Gradle execution path found. Expected a 'run' or 'regressionArtifactGenerator' task."
fi
require_file "${temp_output}"
mv "${temp_output}" "${artifact_file}"
compute_sha256 "${artifact_file}" > "${sha_file}"
log " wrote artifact: ${artifact_file}"
log " wrote digest: ${sha_file}"
}
validate_requested_cases
for case_id in "${CASE_IDS[@]}"; do
if is_requested_case "${case_id}"; then
generate_case "${case_id}"
fi
done
log "Regression artifacts were generated successfully."