feat: add jqwik property-based coverage for trie and patch invariants
test: add property-based tests for FrequencyTrie determinism across repeated compilation test: verify semantic alignment of get(), getAll(), and getEntries() test: verify binary serialization and compressed persistence round-trip stability test: verify builder reconstruction preserves observable trie behavior test: add property-based tests for PatchCommandEncoder encode/apply round-trip and determinism test: add generated stemmer-trie properties ensuring returned patches reconstruct only acceptable stems test: introduce bounded reusable jqwik generators and scenario builders for maintainable property coverage build: add jqwik to test dependencies and integrate it with the existing JUnit Platform setup test: replace Jupiter display and tag annotations in jqwik suites with jqwik-native metadata to remove discovery warnings
This commit is contained in:
@@ -3,20 +3,20 @@
|
|||||||
<classpathentry kind="src" output="bin/main" path="src/main/java">
|
<classpathentry kind="src" output="bin/main" path="src/main/java">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="main"/>
|
<attribute name="gradle_scope" value="main"/>
|
||||||
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
<attribute name="gradle_used_by_scope" value="main,test"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="src" output="bin/test" path="src/test/java">
|
<classpathentry kind="src" output="bin/test" path="src/test/java">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="test"/>
|
<attribute name="gradle_scope" value="test"/>
|
||||||
<attribute name="gradle_used_by_scope" value="test,jmh"/>
|
<attribute name="gradle_used_by_scope" value="test"/>
|
||||||
<attribute name="test" value="true"/>
|
<attribute name="test" value="true"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="src" output="bin/main" path="src/main/resources">
|
<classpathentry kind="src" output="bin/main" path="src/main/resources">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="main"/>
|
<attribute name="gradle_scope" value="main"/>
|
||||||
<attribute name="gradle_used_by_scope" value="main,test,jmh"/>
|
<attribute name="gradle_used_by_scope" value="main,test"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="src" output="bin/jmh" path="src/jmh/java">
|
<classpathentry kind="src" output="bin/jmh" path="src/jmh/java">
|
||||||
@@ -36,7 +36,7 @@
|
|||||||
<classpathentry kind="src" output="bin/test" path="src/test/resources">
|
<classpathentry kind="src" output="bin/test" path="src/test/resources">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="gradle_scope" value="test"/>
|
<attribute name="gradle_scope" value="test"/>
|
||||||
<attribute name="gradle_used_by_scope" value="test,jmh"/>
|
<attribute name="gradle_used_by_scope" value="test"/>
|
||||||
<attribute name="test" value="true"/>
|
<attribute name="test" value="true"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -90,6 +90,9 @@ local.properties
|
|||||||
# PMD plugin conf
|
# PMD plugin conf
|
||||||
.pmd
|
.pmd
|
||||||
|
|
||||||
|
# jqwik local db
|
||||||
|
.jqwik-database
|
||||||
|
|
||||||
##---------------------------------------------------------------------------------------- Gradle
|
##---------------------------------------------------------------------------------------- Gradle
|
||||||
.gradle
|
.gradle
|
||||||
**/build/
|
**/build/
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ dependencies {
|
|||||||
|
|
||||||
testImplementation libs.mockito.core
|
testImplementation libs.mockito.core
|
||||||
testImplementation libs.mockito.junit.jupiter
|
testImplementation libs.mockito.junit.jupiter
|
||||||
|
testImplementation libs.jqwik
|
||||||
|
|
||||||
mockitoAgent(libs.mockito.core) {
|
mockitoAgent(libs.mockito.core) {
|
||||||
transitive = false
|
transitive = false
|
||||||
|
|||||||
@@ -7,6 +7,11 @@ com.google.code.gson:gson:2.13.2=pmd
|
|||||||
com.google.errorprone:error_prone_annotations:2.41.0=pmd
|
com.google.errorprone:error_prone_annotations:2.41.0=pmd
|
||||||
net.bytebuddy:byte-buddy-agent:1.17.7=jmhRuntimeClasspath,testCompileClasspath,testRuntimeClasspath
|
net.bytebuddy:byte-buddy-agent:1.17.7=jmhRuntimeClasspath,testCompileClasspath,testRuntimeClasspath
|
||||||
net.bytebuddy:byte-buddy:1.17.7=jmhRuntimeClasspath,testCompileClasspath,testRuntimeClasspath
|
net.bytebuddy:byte-buddy:1.17.7=jmhRuntimeClasspath,testCompileClasspath,testRuntimeClasspath
|
||||||
|
net.jqwik:jqwik-api:1.9.3=testCompileClasspath,testRuntimeClasspath
|
||||||
|
net.jqwik:jqwik-engine:1.9.3=testRuntimeClasspath
|
||||||
|
net.jqwik:jqwik-time:1.9.3=testCompileClasspath,testRuntimeClasspath
|
||||||
|
net.jqwik:jqwik-web:1.9.3=testCompileClasspath,testRuntimeClasspath
|
||||||
|
net.jqwik:jqwik:1.9.3=testCompileClasspath,testRuntimeClasspath
|
||||||
net.sf.jopt-simple:jopt-simple:4.9=pitest
|
net.sf.jopt-simple:jopt-simple:4.9=pitest
|
||||||
net.sf.jopt-simple:jopt-simple:5.0.4=jmh,jmhCompileClasspath,jmhRuntimeClasspath
|
net.sf.jopt-simple:jopt-simple:5.0.4=jmh,jmhCompileClasspath,jmhRuntimeClasspath
|
||||||
net.sf.saxon:Saxon-HE:12.9=pmd
|
net.sf.saxon:Saxon-HE:12.9=pmd
|
||||||
@@ -19,7 +24,7 @@ org.apache.commons:commons-lang3:3.18.0=pitest
|
|||||||
org.apache.commons:commons-lang3:3.20.0=pmd
|
org.apache.commons:commons-lang3:3.20.0=pmd
|
||||||
org.apache.commons:commons-math3:3.6.1=jmh,jmhCompileClasspath,jmhRuntimeClasspath
|
org.apache.commons:commons-math3:3.6.1=jmh,jmhCompileClasspath,jmhRuntimeClasspath
|
||||||
org.apache.commons:commons-text:1.14.0=pitest
|
org.apache.commons:commons-text:1.14.0=pitest
|
||||||
org.apiguardian:apiguardian-api:1.1.2=testCompileClasspath
|
org.apiguardian:apiguardian-api:1.1.2=testCompileClasspath,testRuntimeClasspath
|
||||||
org.checkerframework:checker-qual:3.52.1=pmd
|
org.checkerframework:checker-qual:3.52.1=pmd
|
||||||
org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt
|
org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt
|
||||||
org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt
|
org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
#
|
#
|
||||||
# After changing dependency versions:
|
# After changing dependency versions:
|
||||||
#
|
#
|
||||||
|
# unlock temporarily: LockMode.STRICT -> LockMode.LENIENT
|
||||||
|
#
|
||||||
|
# refresh verification metadata:
|
||||||
|
# ./gradlew --write-verification-metadata sha256 test jmh distZip cyclonedxBom
|
||||||
|
#
|
||||||
# run:
|
# run:
|
||||||
# ./gradlew --write-locks classes testClasses jmh distZip cyclonedxBom
|
# ./gradlew --write-locks classes testClasses jmh distZip cyclonedxBom
|
||||||
#
|
#
|
||||||
# if needed, refresh verification metadata:
|
|
||||||
# ./gradlew --write-verification-metadata sha256 test jmh distZip cyclonedxBom
|
|
||||||
#
|
|
||||||
# (optional - for Eclipse IDE)
|
# (optional - for Eclipse IDE)
|
||||||
# insert trusted-artifacts into gradle/verification-metadata.xml/verification-metadata/configuration:
|
# insert trusted-artifacts into gradle/verification-metadata.xml/verification-metadata/configuration:
|
||||||
# <trusted-artifacts>
|
# <trusted-artifacts>
|
||||||
@@ -21,6 +23,7 @@
|
|||||||
[versions]
|
[versions]
|
||||||
junit = "5.14.3"
|
junit = "5.14.3"
|
||||||
mockito = "5.23.0"
|
mockito = "5.23.0"
|
||||||
|
jqwik = "1.9.3"
|
||||||
|
|
||||||
[libraries]
|
[libraries]
|
||||||
junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" }
|
junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" }
|
||||||
@@ -29,3 +32,5 @@ junit-platform-launcher = { module = "org.junit.platform:junit-platform-launcher
|
|||||||
|
|
||||||
mockito-core = { module = "org.mockito:mockito-core", version.ref = "mockito" }
|
mockito-core = { module = "org.mockito:mockito-core", version.ref = "mockito" }
|
||||||
mockito-junit-jupiter = { module = "org.mockito:mockito-junit-jupiter", version.ref = "mockito" }
|
mockito-junit-jupiter = { module = "org.mockito:mockito-junit-jupiter", version.ref = "mockito" }
|
||||||
|
|
||||||
|
jqwik = { module = "net.jqwik:jqwik", version.ref = "jqwik" }
|
||||||
|
|||||||
@@ -568,6 +568,46 @@
|
|||||||
<sha256 value="1af699f8d9ddab67f9a0d202fbd7915eb0362a5a6dfd5ffc54cafa3465c9cb0a" origin="Generated by Gradle"/>
|
<sha256 value="1af699f8d9ddab67f9a0d202fbd7915eb0362a5a6dfd5ffc54cafa3465c9cb0a" origin="Generated by Gradle"/>
|
||||||
</artifact>
|
</artifact>
|
||||||
</component>
|
</component>
|
||||||
|
<component group="net.jqwik" name="jqwik" version="1.9.3">
|
||||||
|
<artifact name="jqwik-1.9.3.jar">
|
||||||
|
<sha256 value="562931e1667308180056a8ce85791f71ab8c37ca8efc2006a163ba5d650e5f73" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
<artifact name="jqwik-1.9.3.module">
|
||||||
|
<sha256 value="681316f856db4ea3cac8fcced811127fc1d7016875e5b50aa4a55024513a93d7" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
</component>
|
||||||
|
<component group="net.jqwik" name="jqwik-api" version="1.9.3">
|
||||||
|
<artifact name="jqwik-api-1.9.3.jar">
|
||||||
|
<sha256 value="4bce7e80beb6d9d7092a799fa8a509d76cc31dbb20c938a9952965c15d1dd9b2" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
<artifact name="jqwik-api-1.9.3.module">
|
||||||
|
<sha256 value="69984416ea2e9f7fde40cfac983d2f540d3a37e9766fd3b0a06fada8f9b4cff2" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
</component>
|
||||||
|
<component group="net.jqwik" name="jqwik-engine" version="1.9.3">
|
||||||
|
<artifact name="jqwik-engine-1.9.3.jar">
|
||||||
|
<sha256 value="b85592ee78e30239ccfdca7a134f918ee94ebec51ad29a313fc9a676d97b3ede" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
<artifact name="jqwik-engine-1.9.3.module">
|
||||||
|
<sha256 value="2c68479ebda9e334bc9033abd2ef227353808f20114f197947b5c7b9646ab8e5" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
</component>
|
||||||
|
<component group="net.jqwik" name="jqwik-time" version="1.9.3">
|
||||||
|
<artifact name="jqwik-time-1.9.3.jar">
|
||||||
|
<sha256 value="9fd09021d8f03d44990457bf3095cf0aaf34d2785d1108ff22590286c233b3e5" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
<artifact name="jqwik-time-1.9.3.module">
|
||||||
|
<sha256 value="c2b056576c8767bfcd7efdd982890fbc71e608fb5c9c80fc145cfee6adeeaa24" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
</component>
|
||||||
|
<component group="net.jqwik" name="jqwik-web" version="1.9.3">
|
||||||
|
<artifact name="jqwik-web-1.9.3.jar">
|
||||||
|
<sha256 value="6aee9d583c1ff9efe319b2fa0bc9d75fc616de6d1f240ddbd2af9eabda483dbe" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
<artifact name="jqwik-web-1.9.3.module">
|
||||||
|
<sha256 value="38c86130c8b86c1657b4f8256e065ee08551f7c5ce728d1a5be8f63133b14554" origin="Generated by Gradle"/>
|
||||||
|
</artifact>
|
||||||
|
</component>
|
||||||
<component group="net.sf.jopt-simple" name="jopt-simple" version="4.9">
|
<component group="net.sf.jopt-simple" name="jopt-simple" version="4.9">
|
||||||
<artifact name="jopt-simple-4.9.jar">
|
<artifact name="jopt-simple-4.9.jar">
|
||||||
<sha256 value="26c5856e954b5f864db76f13b86919b59c6eecf9fd930b96baa8884626baf2f5" origin="Generated by Gradle"/>
|
<sha256 value="26c5856e954b5f864db76f13b86919b59c6eecf9fd930b96baa8884626baf2f5" origin="Generated by Gradle"/>
|
||||||
|
|||||||
218
src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java
Normal file
218
src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (C) 2026, Leo Galambos
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
******************************************************************************/
|
||||||
|
package org.egothor.stemmer;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.jqwik.api.ForAll;
|
||||||
|
import net.jqwik.api.Label;
|
||||||
|
import net.jqwik.api.Property;
|
||||||
|
import net.jqwik.api.Tag;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Property-based tests for the compiled trie abstraction.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* These properties focus on deterministic compilation, observable lookup
|
||||||
|
* alignment, binary persistence stability, and safe reconstruction back into a
|
||||||
|
* writable builder. Together they guard the most valuable invariants of the
|
||||||
|
* core algorithm without overfitting to particular fixture data.
|
||||||
|
*/
|
||||||
|
@Label("FrequencyTrie properties")
|
||||||
|
@Tag("unit")
|
||||||
|
@Tag("property")
|
||||||
|
@Tag("trie")
|
||||||
|
class FrequencyTrieProperties extends PropertyBasedTestSupport {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binary codec used by generic trie round-trip assertions.
|
||||||
|
*/
|
||||||
|
private static final FrequencyTrie.ValueStreamCodec<String> STRING_CODEC = new FrequencyTrie.ValueStreamCodec<>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(final DataOutputStream dataOutput, final String value) throws IOException {
|
||||||
|
dataOutput.writeUTF(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String read(final DataInputStream dataInput) throws IOException {
|
||||||
|
return dataInput.readUTF();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that compiling the same insertion scenario repeatedly yields the
|
||||||
|
* same observable lookups.
|
||||||
|
*
|
||||||
|
* @param scenario generated trie scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 80)
|
||||||
|
@Label("compilation should be deterministic for the same insertion scenario")
|
||||||
|
void compilationShouldBeDeterministicForTheSameInsertionScenario(
|
||||||
|
@ForAll("trieScenarios") final TrieScenario scenario, @ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> first = buildTrie(scenario, reductionMode);
|
||||||
|
final FrequencyTrie<String> second = buildTrie(scenario, reductionMode);
|
||||||
|
|
||||||
|
for (String key : scenario.observedKeys()) {
|
||||||
|
assertTrieStateEquals(first, second, key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that {@link FrequencyTrie#get(String)},
|
||||||
|
* {@link FrequencyTrie#getAll(String)}, and
|
||||||
|
* {@link FrequencyTrie#getEntries(String)} remain aligned for every probed key.
|
||||||
|
*
|
||||||
|
* @param scenario generated trie scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 80)
|
||||||
|
@Label("get, getAll, and getEntries should stay semantically aligned")
|
||||||
|
void getGetAllAndGetEntriesShouldStaySemanticallyAligned(@ForAll("trieScenarios") final TrieScenario scenario,
|
||||||
|
@ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> trie = buildTrie(scenario, reductionMode);
|
||||||
|
|
||||||
|
for (String key : scenario.observedKeys()) {
|
||||||
|
final String preferred = trie.get(key);
|
||||||
|
final String[] allValues = trie.getAll(key);
|
||||||
|
final List<ValueCount<String>> entries = trie.getEntries(key);
|
||||||
|
|
||||||
|
assertEquals(allValues.length, entries.size(), "getAll() and getEntries() must have equal cardinality.");
|
||||||
|
|
||||||
|
if (allValues.length == 0) {
|
||||||
|
assertNull(preferred, "get() must return null when no terminal value exists.");
|
||||||
|
assertTrue(entries.isEmpty(), "getEntries() must be empty when getAll() is empty.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(allValues[0], preferred, "get() must expose the preferred first getAll() value.");
|
||||||
|
|
||||||
|
int previousCount = Integer.MAX_VALUE;
|
||||||
|
for (int index = 0; index < entries.size(); index++) {
|
||||||
|
final ValueCount<String> entry = entries.get(index);
|
||||||
|
assertEquals(allValues[index], entry.value(), "entry ordering must match getAll() ordering.");
|
||||||
|
assertTrue(entry.count() >= 1, "stored frequencies must remain positive.");
|
||||||
|
assertTrue(entry.count() <= previousCount, "entry counts must be ordered descending.");
|
||||||
|
previousCount = entry.count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that binary serialization and deserialization preserve all
|
||||||
|
* observable lookup semantics for generated scenarios.
|
||||||
|
*
|
||||||
|
* @param scenario generated trie scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 40)
|
||||||
|
@Label("binary round-trip should preserve observable trie semantics")
|
||||||
|
void binaryRoundTripShouldPreserveObservableTrieSemantics(@ForAll("trieScenarios") final TrieScenario scenario,
|
||||||
|
@ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> original = buildTrie(scenario, reductionMode);
|
||||||
|
final FrequencyTrie<String> roundTripped = roundTrip(original);
|
||||||
|
|
||||||
|
for (String key : scenario.observedKeys()) {
|
||||||
|
assertTrieStateEquals(original, roundTripped, key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that reconstructing a writable builder from a compiled trie and
|
||||||
|
* recompiling it preserves observable lookup semantics.
|
||||||
|
*
|
||||||
|
* @param scenario generated trie scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 60)
|
||||||
|
@Label("builder reconstruction should preserve observable trie semantics")
|
||||||
|
void builderReconstructionShouldPreserveObservableTrieSemantics(
|
||||||
|
@ForAll("trieScenarios") final TrieScenario scenario, @ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> original = buildTrie(scenario, reductionMode);
|
||||||
|
final FrequencyTrie<String> rebuilt = FrequencyTrieBuilders
|
||||||
|
.copyOf(original, STRING_ARRAY_FACTORY, reductionMode).build();
|
||||||
|
|
||||||
|
for (String key : scenario.observedKeys()) {
|
||||||
|
assertEquals(original.get(key), rebuilt.get(key), "preferred lookup must survive reconstruction.");
|
||||||
|
assertArrayEquals(original.getAll(key), rebuilt.getAll(key),
|
||||||
|
"complete ordered result set must survive reconstruction.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts full observable trie equality for one key.
|
||||||
|
*
|
||||||
|
* @param expected expected trie
|
||||||
|
* @param actual actual trie
|
||||||
|
* @param key key to probe
|
||||||
|
*/
|
||||||
|
private static void assertTrieStateEquals(final FrequencyTrie<String> expected, final FrequencyTrie<String> actual,
|
||||||
|
final String key) {
|
||||||
|
assertEquals(expected.get(key), actual.get(key), "preferred lookup drifted.");
|
||||||
|
assertArrayEquals(expected.getAll(key), actual.getAll(key), "ordered result set drifted.");
|
||||||
|
assertIterableEquals(expected.getEntries(key), actual.getEntries(key), "entry list drifted.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Round-trips one trie through its binary representation.
|
||||||
|
*
|
||||||
|
* @param trie trie to persist and reload
|
||||||
|
* @return reloaded trie
|
||||||
|
*/
|
||||||
|
private static FrequencyTrie<String> roundTrip(final FrequencyTrie<String> trie) {
|
||||||
|
try {
|
||||||
|
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||||
|
try (DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream)) {
|
||||||
|
trie.writeTo(dataOutputStream, STRING_CODEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
try (DataInputStream dataInputStream = new DataInputStream(
|
||||||
|
new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) {
|
||||||
|
return FrequencyTrie.readFrom(dataInputStream, STRING_ARRAY_FACTORY, STRING_CODEC);
|
||||||
|
}
|
||||||
|
} catch (IOException exception) {
|
||||||
|
throw new UncheckedIOException("Unexpected binary round-trip failure.", exception);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (C) 2026, Leo Galambos
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
******************************************************************************/
|
||||||
|
package org.egothor.stemmer;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
|
import net.jqwik.api.ForAll;
|
||||||
|
import net.jqwik.api.Label;
|
||||||
|
import net.jqwik.api.Property;
|
||||||
|
import net.jqwik.api.Tag;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Property-based tests for {@link PatchCommandEncoder}.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* These properties protect the most important behavioral contract of the patch
|
||||||
|
* language: encoding must be deterministic and applying an encoded patch must
|
||||||
|
* reconstruct the exact requested target.
|
||||||
|
*/
|
||||||
|
@Label("PatchCommandEncoder properties")
|
||||||
|
@Tag("unit")
|
||||||
|
@Tag("property")
|
||||||
|
@Tag("patch")
|
||||||
|
class PatchCommandEncoderProperties extends PropertyBasedTestSupport {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that encoding followed by application reconstructs the original
|
||||||
|
* target word for bounded generated inputs.
|
||||||
|
*
|
||||||
|
* @param source source word
|
||||||
|
* @param target target word
|
||||||
|
*/
|
||||||
|
@Property(tries = 200)
|
||||||
|
@Label("encode followed by apply should reconstruct the target word")
|
||||||
|
void encodeFollowedByApplyShouldReconstructTheTargetWord(@ForAll("words") final String source,
|
||||||
|
@ForAll("words") final String target) {
|
||||||
|
final PatchCommandEncoder encoder = new PatchCommandEncoder();
|
||||||
|
final String patch = encoder.encode(source, target);
|
||||||
|
|
||||||
|
assertNotNull(patch, "patch generation must succeed for non-null inputs.");
|
||||||
|
assertEquals(target, PatchCommandEncoder.apply(source, patch),
|
||||||
|
"applying the encoded patch must reconstruct the target word.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that encoding is deterministic for the same source-target pair, both
|
||||||
|
* within one encoder instance and across fresh instances.
|
||||||
|
*
|
||||||
|
* @param source source word
|
||||||
|
* @param target target word
|
||||||
|
*/
|
||||||
|
@Property(tries = 150)
|
||||||
|
@Label("encode should be deterministic for one source-target pair")
|
||||||
|
void encodeShouldBeDeterministicForOneSourceTargetPair(@ForAll("words") final String source,
|
||||||
|
@ForAll("words") final String target) {
|
||||||
|
final PatchCommandEncoder sharedEncoder = new PatchCommandEncoder();
|
||||||
|
final String first = sharedEncoder.encode(source, target);
|
||||||
|
final String second = sharedEncoder.encode(source, target);
|
||||||
|
final String fresh = new PatchCommandEncoder().encode(source, target);
|
||||||
|
|
||||||
|
assertEquals(first, second, "one encoder instance must produce stable output.");
|
||||||
|
assertEquals(first, fresh, "fresh encoder instances must produce the same patch output.");
|
||||||
|
}
|
||||||
|
}
|
||||||
326
src/test/java/org/egothor/stemmer/PropertyBasedTestSupport.java
Normal file
326
src/test/java/org/egothor/stemmer/PropertyBasedTestSupport.java
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (C) 2026, Leo Galambos
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
******************************************************************************/
|
||||||
|
package org.egothor.stemmer;
|
||||||
|
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.IntFunction;
|
||||||
|
|
||||||
|
import net.jqwik.api.Arbitraries;
|
||||||
|
import net.jqwik.api.Arbitrary;
|
||||||
|
import net.jqwik.api.Combinators;
|
||||||
|
import net.jqwik.api.Provide;
|
||||||
|
import net.jqwik.api.arbitraries.ListArbitrary;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared jqwik generators and helpers for property-based tests covering the
|
||||||
|
* Radixor algorithmic core.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The generated domains are intentionally bounded to keep CI execution time
|
||||||
|
* predictable while still exploring a broad range of trie shapes, duplicate
|
||||||
|
* insertions, missing lookups, and patch-command transformations.
|
||||||
|
*/
|
||||||
|
abstract class PropertyBasedTestSupport {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared array factory for string tries.
|
||||||
|
*/
|
||||||
|
protected static final IntFunction<String[]> STRING_ARRAY_FACTORY = String[]::new;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides bounded lowercase words suitable for trie keys, stems, and patch
|
||||||
|
* encoder inputs.
|
||||||
|
*
|
||||||
|
* @return bounded word generator
|
||||||
|
*/
|
||||||
|
@Provide
|
||||||
|
protected Arbitrary<String> words() {
|
||||||
|
return Arbitraries.strings().withChars('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l')
|
||||||
|
.ofMinLength(0).ofMaxLength(12);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides non-empty lowercase words suitable for dictionary variants and
|
||||||
|
* stems.
|
||||||
|
*
|
||||||
|
* @return bounded non-empty word generator
|
||||||
|
*/
|
||||||
|
@Provide
|
||||||
|
protected Arbitrary<String> nonEmptyWords() {
|
||||||
|
return Arbitraries.strings().withChars('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l')
|
||||||
|
.ofMinLength(1).ofMaxLength(12);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides bounded insertion scenarios for trie-focused properties.
|
||||||
|
*
|
||||||
|
* @return trie scenario generator
|
||||||
|
*/
|
||||||
|
@Provide
|
||||||
|
protected Arbitrary<TrieScenario> trieScenarios() {
|
||||||
|
final Arbitrary<TrieInsertion> insertionArbitrary = Combinators
|
||||||
|
.combine(words(), nonEmptyWords(), Arbitraries.integers().between(1, 5)).as(TrieInsertion::new);
|
||||||
|
|
||||||
|
final ListArbitrary<TrieInsertion> insertions = insertionArbitrary.list().ofMinSize(1).ofMaxSize(24);
|
||||||
|
final Arbitrary<List<String>> observedKeys = words().list().ofMinSize(0).ofMaxSize(16);
|
||||||
|
|
||||||
|
return Combinators.combine(insertions, observedKeys)
|
||||||
|
.as((scenarioInsertions, additionalObservedKeys) -> new TrieScenario(scenarioInsertions,
|
||||||
|
mergeObservedKeys(scenarioInsertions, additionalObservedKeys)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides bounded stemmer scenarios where each variant word maps to one or
|
||||||
|
* more acceptable stems.
|
||||||
|
*
|
||||||
|
* @return stemmer scenario generator
|
||||||
|
*/
|
||||||
|
@Provide
|
||||||
|
protected Arbitrary<StemmerScenario> stemmerScenarios() {
|
||||||
|
final Arbitrary<StemmerEntry> entryArbitrary = Combinators
|
||||||
|
.combine(nonEmptyWords(), nonEmptyWords().set().ofMinSize(1).ofMaxSize(4)).as((stem, variants) -> {
|
||||||
|
final LinkedHashSet<String> normalizedVariants = new LinkedHashSet<>(variants);
|
||||||
|
normalizedVariants.add(stem);
|
||||||
|
return new StemmerEntry(stem, normalizedVariants);
|
||||||
|
});
|
||||||
|
|
||||||
|
return entryArbitrary.list().ofMinSize(1).ofMaxSize(10).map(StemmerScenario::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a compiled trie from one generated scenario.
|
||||||
|
*
|
||||||
|
* @param scenario trie scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
* @return compiled trie
|
||||||
|
*/
|
||||||
|
protected FrequencyTrie<String> buildTrie(final TrieScenario scenario, final ReductionMode reductionMode) {
|
||||||
|
Objects.requireNonNull(scenario, "scenario");
|
||||||
|
Objects.requireNonNull(reductionMode, "reductionMode");
|
||||||
|
|
||||||
|
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(STRING_ARRAY_FACTORY, reductionMode);
|
||||||
|
for (TrieInsertion insertion : scenario.insertions()) {
|
||||||
|
builder.put(insertion.key(), insertion.value(), insertion.count());
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a patch-command trie from one generated stemmer scenario.
|
||||||
|
*
|
||||||
|
* @param scenario stemmer scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
* @param storeOriginal whether original stems should be stored using the
|
||||||
|
* canonical no-op patch
|
||||||
|
* @return compiled patch-command trie
|
||||||
|
*/
|
||||||
|
protected FrequencyTrie<String> buildStemmerTrie(final StemmerScenario scenario, final ReductionMode reductionMode,
|
||||||
|
final boolean storeOriginal) {
|
||||||
|
Objects.requireNonNull(scenario, "scenario");
|
||||||
|
Objects.requireNonNull(reductionMode, "reductionMode");
|
||||||
|
|
||||||
|
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(STRING_ARRAY_FACTORY, reductionMode);
|
||||||
|
final PatchCommandEncoder encoder = new PatchCommandEncoder();
|
||||||
|
|
||||||
|
for (StemmerEntry entry : scenario.entries()) {
|
||||||
|
if (storeOriginal) {
|
||||||
|
builder.put(entry.stem(), PatchCommandEncoder.NOOP_PATCH);
|
||||||
|
}
|
||||||
|
for (String variant : entry.variants()) {
|
||||||
|
if (!variant.equals(entry.stem())) {
|
||||||
|
builder.put(variant, encoder.encode(variant, entry.stem()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges observed lookup keys while preserving order and keeping scenario keys
|
||||||
|
* relevant to actual trie content.
|
||||||
|
*
|
||||||
|
* @param insertions inserted trie mappings
|
||||||
|
* @param additionalObservedKeys extra lookup probes
|
||||||
|
* @return merged lookup-key set
|
||||||
|
*/
|
||||||
|
private static Set<String> mergeObservedKeys(final List<TrieInsertion> insertions,
|
||||||
|
final List<String> additionalObservedKeys) {
|
||||||
|
final LinkedHashSet<String> observedKeys = new LinkedHashSet<>();
|
||||||
|
for (TrieInsertion insertion : insertions) {
|
||||||
|
observedKeys.add(insertion.key());
|
||||||
|
}
|
||||||
|
observedKeys.addAll(additionalObservedKeys);
|
||||||
|
return observedKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated insertion into a trie builder.
|
||||||
|
*
|
||||||
|
* @param key trie key
|
||||||
|
* @param value stored value
|
||||||
|
* @param count positive insertion count
|
||||||
|
*/
|
||||||
|
protected record TrieInsertion(String key, String value, int count) {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a validated insertion descriptor.
|
||||||
|
*
|
||||||
|
* @param key trie key
|
||||||
|
* @param value stored value
|
||||||
|
* @param count positive insertion count
|
||||||
|
*/
|
||||||
|
public TrieInsertion {
|
||||||
|
Objects.requireNonNull(key, "key");
|
||||||
|
Objects.requireNonNull(value, "value");
|
||||||
|
if (count < 1) {
|
||||||
|
throw new IllegalArgumentException("count must be at least 1.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated trie scenario used by multiple properties.
|
||||||
|
*
|
||||||
|
* @param insertions generated insertions
|
||||||
|
* @param observedKeys lookup probes
|
||||||
|
*/
|
||||||
|
protected record TrieScenario(List<TrieInsertion> insertions, Set<String> observedKeys) {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a validated trie scenario.
|
||||||
|
*
|
||||||
|
* @param insertions generated insertions
|
||||||
|
* @param observedKeys lookup probes
|
||||||
|
*/
|
||||||
|
public TrieScenario {
|
||||||
|
Objects.requireNonNull(insertions, "insertions");
|
||||||
|
Objects.requireNonNull(observedKeys, "observedKeys");
|
||||||
|
insertions = List.copyOf(insertions);
|
||||||
|
observedKeys = Set.copyOf(observedKeys);
|
||||||
|
if (insertions.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("insertions must not be empty.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "TrieScenario[insertions=" + this.insertions.size() + ", observedKeys=" + this.observedKeys.size()
|
||||||
|
+ "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated stemmer dictionary line equivalent.
|
||||||
|
*
|
||||||
|
* @param stem canonical stem
|
||||||
|
* @param variants variants accepted for the stem
|
||||||
|
*/
|
||||||
|
protected record StemmerEntry(String stem, Set<String> variants) {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a validated stemmer entry.
|
||||||
|
*
|
||||||
|
* @param stem canonical stem
|
||||||
|
* @param variants variants accepted for the stem
|
||||||
|
*/
|
||||||
|
public StemmerEntry {
|
||||||
|
Objects.requireNonNull(stem, "stem");
|
||||||
|
Objects.requireNonNull(variants, "variants");
|
||||||
|
variants = Set.copyOf(variants);
|
||||||
|
if (stem.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("stem must not be empty.");
|
||||||
|
}
|
||||||
|
if (variants.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("variants must not be empty.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated stemmer scenario used by patch-command trie properties.
|
||||||
|
*
|
||||||
|
* @param entries generated entries
|
||||||
|
*/
|
||||||
|
protected record StemmerScenario(List<StemmerEntry> entries) {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a validated stemmer scenario.
|
||||||
|
*
|
||||||
|
* @param entries generated entries
|
||||||
|
*/
|
||||||
|
public StemmerScenario {
|
||||||
|
Objects.requireNonNull(entries, "entries");
|
||||||
|
entries = List.copyOf(entries);
|
||||||
|
if (entries.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("entries must not be empty.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all known source words that should be probeable in the resulting
|
||||||
|
* trie.
|
||||||
|
*
|
||||||
|
* @return observed lookup words
|
||||||
|
*/
|
||||||
|
public Set<String> observedWords() {
|
||||||
|
final LinkedHashSet<String> observedWords = new LinkedHashSet<>();
|
||||||
|
for (StemmerEntry entry : this.entries) {
|
||||||
|
observedWords.add(entry.stem());
|
||||||
|
observedWords.addAll(entry.variants());
|
||||||
|
}
|
||||||
|
return observedWords;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all acceptable stems for one observed word.
|
||||||
|
*
|
||||||
|
* @param word observed word
|
||||||
|
* @return acceptable stems
|
||||||
|
*/
|
||||||
|
public Set<String> acceptableStemsFor(final String word) {
|
||||||
|
final LinkedHashSet<String> stems = new LinkedHashSet<>();
|
||||||
|
for (StemmerEntry entry : this.entries) {
|
||||||
|
if (entry.stem().equals(word) || entry.variants().contains(word)) {
|
||||||
|
stems.add(entry.stem());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stems;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "StemmerScenario[entries=" + this.entries.size() + "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (C) 2026, Leo Galambos
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
******************************************************************************/
|
||||||
|
package org.egothor.stemmer;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import net.jqwik.api.ForAll;
|
||||||
|
import net.jqwik.api.Label;
|
||||||
|
import net.jqwik.api.Property;
|
||||||
|
import net.jqwik.api.Tag;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Property-based tests for patch-command stemmer tries.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* These properties verify the most important semantic contract of compiled
|
||||||
|
* stemmer dictionaries: every patch returned for a known input word must decode
|
||||||
|
* to one of the acceptable stems declared by the source scenario, and binary
|
||||||
|
* persistence must not alter that behavior.
|
||||||
|
*/
|
||||||
|
@Label("Stemmer patch trie properties")
|
||||||
|
@Tag("unit")
|
||||||
|
@Tag("property")
|
||||||
|
@Tag("stemming")
|
||||||
|
class StemmerPatchTrieProperties extends PropertyBasedTestSupport {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that every returned patch reconstructs only acceptable stems for the
|
||||||
|
* observed word set represented by one generated stemmer scenario.
|
||||||
|
*
|
||||||
|
* @param scenario generated stemmer scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 60)
|
||||||
|
@Label("returned patches should reconstruct only acceptable stems")
|
||||||
|
void returnedPatchesShouldReconstructOnlyAcceptableStems(@ForAll("stemmerScenarios") final StemmerScenario scenario,
|
||||||
|
@ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> trie = buildStemmerTrie(scenario, reductionMode, true);
|
||||||
|
|
||||||
|
for (String observedWord : scenario.observedWords()) {
|
||||||
|
final Set<String> acceptableStems = scenario.acceptableStemsFor(observedWord);
|
||||||
|
final String preferredPatch = trie.get(observedWord);
|
||||||
|
final String[] allPatches = trie.getAll(observedWord);
|
||||||
|
|
||||||
|
assertTrue(preferredPatch != null && !preferredPatch.isEmpty(),
|
||||||
|
"preferred patch must exist for an observed word.");
|
||||||
|
assertTrue(allPatches.length >= 1, "at least one patch must exist for an observed word.");
|
||||||
|
assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(observedWord, preferredPatch)),
|
||||||
|
"preferred patch reconstructed an unexpected stem.");
|
||||||
|
|
||||||
|
final Set<String> producedStems = applyAll(observedWord, allPatches);
|
||||||
|
assertTrue(acceptableStems.containsAll(producedStems),
|
||||||
|
"getAll() must not expose a patch that reconstructs an undeclared stem.");
|
||||||
|
|
||||||
|
if (acceptableStems.contains(observedWord)) {
|
||||||
|
assertTrue(producedStems.contains(observedWord),
|
||||||
|
"storeOriginal semantics must preserve the original stem among returned results.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that GZip-compressed binary persistence preserves patch-command trie
|
||||||
|
* lookups.
|
||||||
|
*
|
||||||
|
* @param scenario generated stemmer scenario
|
||||||
|
* @param reductionMode reduction mode
|
||||||
|
*/
|
||||||
|
@Property(tries = 30)
|
||||||
|
@Label("binary persistence should preserve patch-command trie lookups")
|
||||||
|
void binaryPersistenceShouldPreservePatchCommandTrieLookups(
|
||||||
|
@ForAll("stemmerScenarios") final StemmerScenario scenario, @ForAll final ReductionMode reductionMode) {
|
||||||
|
final FrequencyTrie<String> original = buildStemmerTrie(scenario, reductionMode, true);
|
||||||
|
final FrequencyTrie<String> roundTripped = roundTripCompressed(original);
|
||||||
|
|
||||||
|
for (String observedWord : scenario.observedWords()) {
|
||||||
|
assertEquals(original.get(observedWord), roundTripped.get(observedWord),
|
||||||
|
"preferred patch lookup drifted after persistence.");
|
||||||
|
assertArrayEquals(original.getAll(observedWord), roundTripped.getAll(observedWord),
|
||||||
|
"complete patch result set drifted after persistence.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies all returned patches to the supplied source word.
|
||||||
|
*
|
||||||
|
* @param source source word
|
||||||
|
* @param patches returned patches
|
||||||
|
* @return decoded stem set
|
||||||
|
*/
|
||||||
|
private static Set<String> applyAll(final String source, final String[] patches) {
|
||||||
|
final LinkedHashSet<String> stems = new LinkedHashSet<>();
|
||||||
|
for (String patch : patches) {
|
||||||
|
stems.add(PatchCommandEncoder.apply(source, patch));
|
||||||
|
}
|
||||||
|
return stems;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Round-trips one patch-command trie through the compressed binary helper.
|
||||||
|
*
|
||||||
|
* @param trie trie to persist and reload
|
||||||
|
* @return reloaded trie
|
||||||
|
*/
|
||||||
|
private static FrequencyTrie<String> roundTripCompressed(final FrequencyTrie<String> trie) {
|
||||||
|
try {
|
||||||
|
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||||
|
StemmerPatchTrieBinaryIO.write(trie, byteArrayOutputStream);
|
||||||
|
return StemmerPatchTrieBinaryIO.read(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
|
||||||
|
} catch (IOException exception) {
|
||||||
|
throw new UncheckedIOException("Unexpected compressed binary round-trip failure.", exception);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user