test: add deterministic regression coverage for compiled trie artifacts test: add golden artifact resources and SHA-256 sidecar validation test: add compiled trie artifact generator utility for regression preparation build: add Gradle task for regression artifact generation chore: add bash script to generate golden compiled trie regression files fix: normalize SHA-256 sidecar output to use artifact basename only fix: harden test resource loading for regression classpath access fix: reconstruct stems from patch commands in golden artifact semantic probes
257 lines
7.6 KiB
Bash
Executable File
257 lines
7.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -Eeuo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
|
|
SOURCE_DIR="${PROJECT_DIR}/src/test/resources/regression/sources"
|
|
GOLDEN_DIR="${PROJECT_DIR}/src/test/resources/regression/golden"
|
|
BUILD_DIR="${PROJECT_DIR}/build/tmp/regression-artifacts"
|
|
|
|
MAIN_CLASS="org.egothor.stemmer.RegressionArtifactGenerator"
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Generate deterministic compiled trie regression artifacts and SHA-256 sidecar files.
|
|
|
|
Usage:
|
|
generate-regression-artifacts.sh [--clean] [--case <id>]...
|
|
|
|
Options:
|
|
--clean Remove previously generated temporary files before execution.
|
|
--case <id> Generate only the selected case. May be repeated.
|
|
--help Show this help.
|
|
|
|
Known case identifiers:
|
|
01-mini-ranked-store-original
|
|
02-mini-unordered-store-original
|
|
03-branching-ranked-no-store-original
|
|
|
|
Notes:
|
|
- This script expects a helper Java class:
|
|
org.egothor.stemmer.RegressionArtifactGenerator
|
|
- The helper should compile the stemmer source into a .gz artifact using the
|
|
project's real binary writer implementation.
|
|
- The script writes:
|
|
src/test/resources/regression/golden/*.gz
|
|
src/test/resources/regression/golden/*.gz.sha256
|
|
EOF
|
|
}
|
|
|
|
log() {
|
|
printf '[INFO] %s\n' "$*"
|
|
}
|
|
|
|
fail() {
|
|
printf '[ERROR] %s\n' "$*" >&2
|
|
exit 1
|
|
}
|
|
|
|
require_file() {
|
|
local path="$1"
|
|
[[ -f "${path}" ]] || fail "Required file not found: ${path}"
|
|
}
|
|
|
|
compute_sha256() {
|
|
local file_path="$1"
|
|
local file_name
|
|
file_name="$(basename "${file_path}")"
|
|
|
|
if command -v sha256sum >/dev/null 2>&1; then
|
|
local digest
|
|
digest="$(sha256sum "${file_path}" | awk '{print $1}')"
|
|
printf '%s %s\n' "${digest}" "${file_name}"
|
|
return 0
|
|
fi
|
|
|
|
if command -v shasum >/dev/null 2>&1; then
|
|
local digest
|
|
digest="$(shasum -a 256 "${file_path}" | awk '{print $1}')"
|
|
printf '%s %s\n' "${digest}" "${file_name}"
|
|
return 0
|
|
fi
|
|
|
|
if command -v openssl >/dev/null 2>&1; then
|
|
local digest
|
|
digest="$(openssl dgst -sha256 "${file_path}" | awk '{print $2}')"
|
|
printf '%s %s\n' "${digest}" "${file_name}"
|
|
return 0
|
|
fi
|
|
|
|
fail "No SHA-256 tool available. Install sha256sum, shasum, or openssl."
|
|
}
|
|
|
|
declare -a REQUESTED_CASES=()
|
|
CLEAN="false"
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--clean)
|
|
CLEAN="true"
|
|
shift
|
|
;;
|
|
--case)
|
|
[[ $# -ge 2 ]] || fail "Missing value for --case."
|
|
REQUESTED_CASES+=("$2")
|
|
shift 2
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
fail "Unknown argument: $1"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
mkdir -p "${GOLDEN_DIR}"
|
|
mkdir -p "${BUILD_DIR}"
|
|
|
|
if [[ "${CLEAN}" == "true" ]]; then
|
|
log "Cleaning temporary directory: ${BUILD_DIR}"
|
|
rm -rf "${BUILD_DIR}"
|
|
mkdir -p "${BUILD_DIR}"
|
|
fi
|
|
|
|
declare -a CASE_IDS=()
|
|
declare -A CASE_SOURCE=()
|
|
declare -A CASE_STORE_ORIGINAL=()
|
|
declare -A CASE_REDUCTION_MODE=()
|
|
declare -A CASE_ARTIFACT=()
|
|
|
|
CASE_IDS+=("01-mini-ranked-store-original")
|
|
CASE_SOURCE["01-mini-ranked-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
|
|
CASE_STORE_ORIGINAL["01-mini-ranked-store-original"]="true"
|
|
CASE_REDUCTION_MODE["01-mini-ranked-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
|
|
CASE_ARTIFACT["01-mini-ranked-store-original"]="${GOLDEN_DIR}/mini-en-ranked-storeorig.gz"
|
|
|
|
CASE_IDS+=("02-mini-unordered-store-original")
|
|
CASE_SOURCE["02-mini-unordered-store-original"]="${SOURCE_DIR}/mini-en.stemmer"
|
|
CASE_STORE_ORIGINAL["02-mini-unordered-store-original"]="true"
|
|
CASE_REDUCTION_MODE["02-mini-unordered-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS"
|
|
CASE_ARTIFACT["02-mini-unordered-store-original"]="${GOLDEN_DIR}/mini-en-unordered-storeorig.gz"
|
|
|
|
CASE_IDS+=("03-branching-ranked-no-store-original")
|
|
CASE_SOURCE["03-branching-ranked-no-store-original"]="${SOURCE_DIR}/branching-en.stemmer"
|
|
CASE_STORE_ORIGINAL["03-branching-ranked-no-store-original"]="false"
|
|
CASE_REDUCTION_MODE["03-branching-ranked-no-store-original"]="MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS"
|
|
CASE_ARTIFACT["03-branching-ranked-no-store-original"]="${GOLDEN_DIR}/branching-en-ranked-no-storeorig.gz"
|
|
|
|
is_requested_case() {
|
|
local case_id="$1"
|
|
|
|
if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
|
|
return 0
|
|
fi
|
|
|
|
local requested
|
|
for requested in "${REQUESTED_CASES[@]}"; do
|
|
if [[ "${requested}" == "${case_id}" ]]; then
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
validate_requested_cases() {
|
|
if [[ ${#REQUESTED_CASES[@]} -eq 0 ]]; then
|
|
return 0
|
|
fi
|
|
|
|
local requested
|
|
local known
|
|
local found
|
|
|
|
for requested in "${REQUESTED_CASES[@]}"; do
|
|
found="false"
|
|
for known in "${CASE_IDS[@]}"; do
|
|
if [[ "${requested}" == "${known}" ]]; then
|
|
found="true"
|
|
break
|
|
fi
|
|
done
|
|
[[ "${found}" == "true" ]] || fail "Unknown case identifier: ${requested}"
|
|
done
|
|
}
|
|
|
|
run_generator() {
|
|
local input_file="$1"
|
|
local output_file="$2"
|
|
local store_original="$3"
|
|
local reduction_mode="$4"
|
|
|
|
"${PROJECT_DIR}/gradlew" \
|
|
--no-daemon \
|
|
-q \
|
|
testClasses \
|
|
regressionArtifactGenerator \
|
|
-PregressionInput="${input_file}" \
|
|
-PregressionOutput="${output_file}" \
|
|
-PregressionStoreOriginal="${store_original}" \
|
|
-PregressionReductionMode="${reduction_mode}"
|
|
}
|
|
|
|
# Fallback path when the project does not expose a generic run task.
|
|
run_generator_with_javaexec_fallback() {
|
|
local input_file="$1"
|
|
local output_file="$2"
|
|
local store_original="$3"
|
|
local reduction_mode="$4"
|
|
|
|
"${PROJECT_DIR}/gradlew" \
|
|
--no-daemon \
|
|
-q \
|
|
testClasses \
|
|
-PregressionGeneratorMainClass="${MAIN_CLASS}" \
|
|
-PregressionGeneratorArgs="--input=${input_file} --output=${output_file} --store-original=${store_original} --reduction-mode=${reduction_mode}" \
|
|
regressionArtifactGenerator
|
|
}
|
|
|
|
generate_case() {
|
|
local case_id="$1"
|
|
local source_file="${CASE_SOURCE[${case_id}]}"
|
|
local artifact_file="${CASE_ARTIFACT[${case_id}]}"
|
|
local sha_file="${artifact_file}.sha256"
|
|
local store_original="${CASE_STORE_ORIGINAL[${case_id}]}"
|
|
local reduction_mode="${CASE_REDUCTION_MODE[${case_id}]}"
|
|
local temp_output="${BUILD_DIR}/$(basename "${artifact_file}")"
|
|
|
|
require_file "${source_file}"
|
|
|
|
log "Generating case: ${case_id}"
|
|
log " source: ${source_file}"
|
|
log " artifact: ${artifact_file}"
|
|
log " reduction mode: ${reduction_mode}"
|
|
log " store original: ${store_original}"
|
|
|
|
rm -f "${temp_output}"
|
|
|
|
if "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^run '; then
|
|
run_generator "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
|
|
elif "${PROJECT_DIR}/gradlew" tasks --all 2>/dev/null | grep -q '^regressionArtifactGenerator '; then
|
|
run_generator_with_javaexec_fallback "${source_file}" "${temp_output}" "${store_original}" "${reduction_mode}"
|
|
else
|
|
fail "No supported Gradle execution path found. Expected a 'run' or 'regressionArtifactGenerator' task."
|
|
fi
|
|
|
|
require_file "${temp_output}"
|
|
|
|
mv "${temp_output}" "${artifact_file}"
|
|
compute_sha256 "${artifact_file}" > "${sha_file}"
|
|
|
|
log " wrote artifact: ${artifact_file}"
|
|
log " wrote digest: ${sha_file}"
|
|
}
|
|
|
|
validate_requested_cases
|
|
|
|
for case_id in "${CASE_IDS[@]}"; do
|
|
if is_requested_case "${case_id}"; then
|
|
generate_case "${case_id}"
|
|
fi
|
|
done
|
|
|
|
log "Regression artifacts were generated successfully."
|