feat: add jqwik property-based coverage for trie and patch invariants

test: add property-based tests for FrequencyTrie determinism across repeated compilation
test: verify semantic alignment of get(), getAll(), and getEntries()
test: verify binary serialization and compressed persistence round-trip stability
test: verify builder reconstruction preserves observable trie behavior
test: add property-based tests for PatchCommandEncoder encode/apply round-trip and determinism
test: add generated stemmer-trie properties ensuring returned patches reconstruct only acceptable stems
test: introduce bounded reusable jqwik generators and scenario builders for maintainable property coverage
build: add jqwik to test dependencies and integrate it with the existing JUnit Platform setup
test: replace Jupiter display and tag annotations in jqwik suites with jqwik-native metadata to remove discovery warnings
This commit is contained in:
2026-04-16 19:40:29 +02:00
parent 953ce2226a
commit 594abe2c4b
10 changed files with 850 additions and 8 deletions

View File

@@ -0,0 +1,218 @@
/*******************************************************************************
* Copyright (C) 2026, Leo Galambos
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package org.egothor.stemmer;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.List;
import net.jqwik.api.ForAll;
import net.jqwik.api.Label;
import net.jqwik.api.Property;
import net.jqwik.api.Tag;
/**
* Property-based tests for the compiled trie abstraction.
*
* <p>
* These properties focus on deterministic compilation, observable lookup
* alignment, binary persistence stability, and safe reconstruction back into a
* writable builder. Together they guard the most valuable invariants of the
* core algorithm without overfitting to particular fixture data.
*/
@Label("FrequencyTrie properties")
@Tag("unit")
@Tag("property")
@Tag("trie")
class FrequencyTrieProperties extends PropertyBasedTestSupport {
/**
* Binary codec used by generic trie round-trip assertions.
*/
private static final FrequencyTrie.ValueStreamCodec<String> STRING_CODEC = new FrequencyTrie.ValueStreamCodec<>() {
@Override
public void write(final DataOutputStream dataOutput, final String value) throws IOException {
dataOutput.writeUTF(value);
}
@Override
public String read(final DataInputStream dataInput) throws IOException {
return dataInput.readUTF();
}
};
/**
* Verifies that compiling the same insertion scenario repeatedly yields the
* same observable lookups.
*
* @param scenario generated trie scenario
* @param reductionMode reduction mode
*/
@Property(tries = 80)
@Label("compilation should be deterministic for the same insertion scenario")
void compilationShouldBeDeterministicForTheSameInsertionScenario(
@ForAll("trieScenarios") final TrieScenario scenario, @ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> first = buildTrie(scenario, reductionMode);
final FrequencyTrie<String> second = buildTrie(scenario, reductionMode);
for (String key : scenario.observedKeys()) {
assertTrieStateEquals(first, second, key);
}
}
/**
* Verifies that {@link FrequencyTrie#get(String)},
* {@link FrequencyTrie#getAll(String)}, and
* {@link FrequencyTrie#getEntries(String)} remain aligned for every probed key.
*
* @param scenario generated trie scenario
* @param reductionMode reduction mode
*/
@Property(tries = 80)
@Label("get, getAll, and getEntries should stay semantically aligned")
void getGetAllAndGetEntriesShouldStaySemanticallyAligned(@ForAll("trieScenarios") final TrieScenario scenario,
@ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> trie = buildTrie(scenario, reductionMode);
for (String key : scenario.observedKeys()) {
final String preferred = trie.get(key);
final String[] allValues = trie.getAll(key);
final List<ValueCount<String>> entries = trie.getEntries(key);
assertEquals(allValues.length, entries.size(), "getAll() and getEntries() must have equal cardinality.");
if (allValues.length == 0) {
assertNull(preferred, "get() must return null when no terminal value exists.");
assertTrue(entries.isEmpty(), "getEntries() must be empty when getAll() is empty.");
continue;
}
assertEquals(allValues[0], preferred, "get() must expose the preferred first getAll() value.");
int previousCount = Integer.MAX_VALUE;
for (int index = 0; index < entries.size(); index++) {
final ValueCount<String> entry = entries.get(index);
assertEquals(allValues[index], entry.value(), "entry ordering must match getAll() ordering.");
assertTrue(entry.count() >= 1, "stored frequencies must remain positive.");
assertTrue(entry.count() <= previousCount, "entry counts must be ordered descending.");
previousCount = entry.count();
}
}
}
/**
* Verifies that binary serialization and deserialization preserve all
* observable lookup semantics for generated scenarios.
*
* @param scenario generated trie scenario
* @param reductionMode reduction mode
*/
@Property(tries = 40)
@Label("binary round-trip should preserve observable trie semantics")
void binaryRoundTripShouldPreserveObservableTrieSemantics(@ForAll("trieScenarios") final TrieScenario scenario,
@ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> original = buildTrie(scenario, reductionMode);
final FrequencyTrie<String> roundTripped = roundTrip(original);
for (String key : scenario.observedKeys()) {
assertTrieStateEquals(original, roundTripped, key);
}
}
/**
* Verifies that reconstructing a writable builder from a compiled trie and
* recompiling it preserves observable lookup semantics.
*
* @param scenario generated trie scenario
* @param reductionMode reduction mode
*/
@Property(tries = 60)
@Label("builder reconstruction should preserve observable trie semantics")
void builderReconstructionShouldPreserveObservableTrieSemantics(
@ForAll("trieScenarios") final TrieScenario scenario, @ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> original = buildTrie(scenario, reductionMode);
final FrequencyTrie<String> rebuilt = FrequencyTrieBuilders
.copyOf(original, STRING_ARRAY_FACTORY, reductionMode).build();
for (String key : scenario.observedKeys()) {
assertEquals(original.get(key), rebuilt.get(key), "preferred lookup must survive reconstruction.");
assertArrayEquals(original.getAll(key), rebuilt.getAll(key),
"complete ordered result set must survive reconstruction.");
}
}
/**
* Asserts full observable trie equality for one key.
*
* @param expected expected trie
* @param actual actual trie
* @param key key to probe
*/
private static void assertTrieStateEquals(final FrequencyTrie<String> expected, final FrequencyTrie<String> actual,
final String key) {
assertEquals(expected.get(key), actual.get(key), "preferred lookup drifted.");
assertArrayEquals(expected.getAll(key), actual.getAll(key), "ordered result set drifted.");
assertIterableEquals(expected.getEntries(key), actual.getEntries(key), "entry list drifted.");
}
/**
* Round-trips one trie through its binary representation.
*
* @param trie trie to persist and reload
* @return reloaded trie
*/
private static FrequencyTrie<String> roundTrip(final FrequencyTrie<String> trie) {
try {
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream)) {
trie.writeTo(dataOutputStream, STRING_CODEC);
}
try (DataInputStream dataInputStream = new DataInputStream(
new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))) {
return FrequencyTrie.readFrom(dataInputStream, STRING_ARRAY_FACTORY, STRING_CODEC);
}
} catch (IOException exception) {
throw new UncheckedIOException("Unexpected binary round-trip failure.", exception);
}
}
}

View File

@@ -0,0 +1,93 @@
/*******************************************************************************
* Copyright (C) 2026, Leo Galambos
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package org.egothor.stemmer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import net.jqwik.api.ForAll;
import net.jqwik.api.Label;
import net.jqwik.api.Property;
import net.jqwik.api.Tag;
/**
* Property-based tests for {@link PatchCommandEncoder}.
*
* <p>
* These properties protect the most important behavioral contract of the patch
* language: encoding must be deterministic and applying an encoded patch must
* reconstruct the exact requested target.
*/
@Label("PatchCommandEncoder properties")
@Tag("unit")
@Tag("property")
@Tag("patch")
class PatchCommandEncoderProperties extends PropertyBasedTestSupport {
/**
* Verifies that encoding followed by application reconstructs the original
* target word for bounded generated inputs.
*
* @param source source word
* @param target target word
*/
@Property(tries = 200)
@Label("encode followed by apply should reconstruct the target word")
void encodeFollowedByApplyShouldReconstructTheTargetWord(@ForAll("words") final String source,
@ForAll("words") final String target) {
final PatchCommandEncoder encoder = new PatchCommandEncoder();
final String patch = encoder.encode(source, target);
assertNotNull(patch, "patch generation must succeed for non-null inputs.");
assertEquals(target, PatchCommandEncoder.apply(source, patch),
"applying the encoded patch must reconstruct the target word.");
}
/**
* Verifies that encoding is deterministic for the same source-target pair, both
* within one encoder instance and across fresh instances.
*
* @param source source word
* @param target target word
*/
@Property(tries = 150)
@Label("encode should be deterministic for one source-target pair")
void encodeShouldBeDeterministicForOneSourceTargetPair(@ForAll("words") final String source,
@ForAll("words") final String target) {
final PatchCommandEncoder sharedEncoder = new PatchCommandEncoder();
final String first = sharedEncoder.encode(source, target);
final String second = sharedEncoder.encode(source, target);
final String fresh = new PatchCommandEncoder().encode(source, target);
assertEquals(first, second, "one encoder instance must produce stable output.");
assertEquals(first, fresh, "fresh encoder instances must produce the same patch output.");
}
}

View File

@@ -0,0 +1,326 @@
/*******************************************************************************
* Copyright (C) 2026, Leo Galambos
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package org.egothor.stemmer;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.IntFunction;
import net.jqwik.api.Arbitraries;
import net.jqwik.api.Arbitrary;
import net.jqwik.api.Combinators;
import net.jqwik.api.Provide;
import net.jqwik.api.arbitraries.ListArbitrary;
/**
* Shared jqwik generators and helpers for property-based tests covering the
* Radixor algorithmic core.
*
* <p>
* The generated domains are intentionally bounded to keep CI execution time
* predictable while still exploring a broad range of trie shapes, duplicate
* insertions, missing lookups, and patch-command transformations.
*/
abstract class PropertyBasedTestSupport {
/**
* Shared array factory for string tries.
*/
protected static final IntFunction<String[]> STRING_ARRAY_FACTORY = String[]::new;
/**
* Provides bounded lowercase words suitable for trie keys, stems, and patch
* encoder inputs.
*
* @return bounded word generator
*/
@Provide
protected Arbitrary<String> words() {
return Arbitraries.strings().withChars('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l')
.ofMinLength(0).ofMaxLength(12);
}
/**
* Provides non-empty lowercase words suitable for dictionary variants and
* stems.
*
* @return bounded non-empty word generator
*/
@Provide
protected Arbitrary<String> nonEmptyWords() {
return Arbitraries.strings().withChars('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l')
.ofMinLength(1).ofMaxLength(12);
}
/**
* Provides bounded insertion scenarios for trie-focused properties.
*
* @return trie scenario generator
*/
@Provide
protected Arbitrary<TrieScenario> trieScenarios() {
final Arbitrary<TrieInsertion> insertionArbitrary = Combinators
.combine(words(), nonEmptyWords(), Arbitraries.integers().between(1, 5)).as(TrieInsertion::new);
final ListArbitrary<TrieInsertion> insertions = insertionArbitrary.list().ofMinSize(1).ofMaxSize(24);
final Arbitrary<List<String>> observedKeys = words().list().ofMinSize(0).ofMaxSize(16);
return Combinators.combine(insertions, observedKeys)
.as((scenarioInsertions, additionalObservedKeys) -> new TrieScenario(scenarioInsertions,
mergeObservedKeys(scenarioInsertions, additionalObservedKeys)));
}
/**
* Provides bounded stemmer scenarios where each variant word maps to one or
* more acceptable stems.
*
* @return stemmer scenario generator
*/
@Provide
protected Arbitrary<StemmerScenario> stemmerScenarios() {
final Arbitrary<StemmerEntry> entryArbitrary = Combinators
.combine(nonEmptyWords(), nonEmptyWords().set().ofMinSize(1).ofMaxSize(4)).as((stem, variants) -> {
final LinkedHashSet<String> normalizedVariants = new LinkedHashSet<>(variants);
normalizedVariants.add(stem);
return new StemmerEntry(stem, normalizedVariants);
});
return entryArbitrary.list().ofMinSize(1).ofMaxSize(10).map(StemmerScenario::new);
}
/**
* Builds a compiled trie from one generated scenario.
*
* @param scenario trie scenario
* @param reductionMode reduction mode
* @return compiled trie
*/
protected FrequencyTrie<String> buildTrie(final TrieScenario scenario, final ReductionMode reductionMode) {
Objects.requireNonNull(scenario, "scenario");
Objects.requireNonNull(reductionMode, "reductionMode");
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(STRING_ARRAY_FACTORY, reductionMode);
for (TrieInsertion insertion : scenario.insertions()) {
builder.put(insertion.key(), insertion.value(), insertion.count());
}
return builder.build();
}
/**
* Builds a patch-command trie from one generated stemmer scenario.
*
* @param scenario stemmer scenario
* @param reductionMode reduction mode
* @param storeOriginal whether original stems should be stored using the
* canonical no-op patch
* @return compiled patch-command trie
*/
protected FrequencyTrie<String> buildStemmerTrie(final StemmerScenario scenario, final ReductionMode reductionMode,
final boolean storeOriginal) {
Objects.requireNonNull(scenario, "scenario");
Objects.requireNonNull(reductionMode, "reductionMode");
final FrequencyTrie.Builder<String> builder = new FrequencyTrie.Builder<>(STRING_ARRAY_FACTORY, reductionMode);
final PatchCommandEncoder encoder = new PatchCommandEncoder();
for (StemmerEntry entry : scenario.entries()) {
if (storeOriginal) {
builder.put(entry.stem(), PatchCommandEncoder.NOOP_PATCH);
}
for (String variant : entry.variants()) {
if (!variant.equals(entry.stem())) {
builder.put(variant, encoder.encode(variant, entry.stem()));
}
}
}
return builder.build();
}
/**
* Merges observed lookup keys while preserving order and keeping scenario keys
* relevant to actual trie content.
*
* @param insertions inserted trie mappings
* @param additionalObservedKeys extra lookup probes
* @return merged lookup-key set
*/
private static Set<String> mergeObservedKeys(final List<TrieInsertion> insertions,
final List<String> additionalObservedKeys) {
final LinkedHashSet<String> observedKeys = new LinkedHashSet<>();
for (TrieInsertion insertion : insertions) {
observedKeys.add(insertion.key());
}
observedKeys.addAll(additionalObservedKeys);
return observedKeys;
}
/**
* Generated insertion into a trie builder.
*
* @param key trie key
* @param value stored value
* @param count positive insertion count
*/
protected record TrieInsertion(String key, String value, int count) {
/**
* Creates a validated insertion descriptor.
*
* @param key trie key
* @param value stored value
* @param count positive insertion count
*/
public TrieInsertion {
Objects.requireNonNull(key, "key");
Objects.requireNonNull(value, "value");
if (count < 1) {
throw new IllegalArgumentException("count must be at least 1.");
}
}
}
/**
* Generated trie scenario used by multiple properties.
*
* @param insertions generated insertions
* @param observedKeys lookup probes
*/
protected record TrieScenario(List<TrieInsertion> insertions, Set<String> observedKeys) {
/**
* Creates a validated trie scenario.
*
* @param insertions generated insertions
* @param observedKeys lookup probes
*/
public TrieScenario {
Objects.requireNonNull(insertions, "insertions");
Objects.requireNonNull(observedKeys, "observedKeys");
insertions = List.copyOf(insertions);
observedKeys = Set.copyOf(observedKeys);
if (insertions.isEmpty()) {
throw new IllegalArgumentException("insertions must not be empty.");
}
}
@Override
public String toString() {
return "TrieScenario[insertions=" + this.insertions.size() + ", observedKeys=" + this.observedKeys.size()
+ "]";
}
}
/**
* Generated stemmer dictionary line equivalent.
*
* @param stem canonical stem
* @param variants variants accepted for the stem
*/
protected record StemmerEntry(String stem, Set<String> variants) {
/**
* Creates a validated stemmer entry.
*
* @param stem canonical stem
* @param variants variants accepted for the stem
*/
public StemmerEntry {
Objects.requireNonNull(stem, "stem");
Objects.requireNonNull(variants, "variants");
variants = Set.copyOf(variants);
if (stem.isEmpty()) {
throw new IllegalArgumentException("stem must not be empty.");
}
if (variants.isEmpty()) {
throw new IllegalArgumentException("variants must not be empty.");
}
}
}
/**
* Generated stemmer scenario used by patch-command trie properties.
*
* @param entries generated entries
*/
protected record StemmerScenario(List<StemmerEntry> entries) {
/**
* Creates a validated stemmer scenario.
*
* @param entries generated entries
*/
public StemmerScenario {
Objects.requireNonNull(entries, "entries");
entries = List.copyOf(entries);
if (entries.isEmpty()) {
throw new IllegalArgumentException("entries must not be empty.");
}
}
/**
* Returns all known source words that should be probeable in the resulting
* trie.
*
* @return observed lookup words
*/
public Set<String> observedWords() {
final LinkedHashSet<String> observedWords = new LinkedHashSet<>();
for (StemmerEntry entry : this.entries) {
observedWords.add(entry.stem());
observedWords.addAll(entry.variants());
}
return observedWords;
}
/**
* Returns all acceptable stems for one observed word.
*
* @param word observed word
* @return acceptable stems
*/
public Set<String> acceptableStemsFor(final String word) {
final LinkedHashSet<String> stems = new LinkedHashSet<>();
for (StemmerEntry entry : this.entries) {
if (entry.stem().equals(word) || entry.variants().contains(word)) {
stems.add(entry.stem());
}
}
return stems;
}
@Override
public String toString() {
return "StemmerScenario[entries=" + this.entries.size() + "]";
}
}
}

View File

@@ -0,0 +1,151 @@
/*******************************************************************************
* Copyright (C) 2026, Leo Galambos
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package org.egothor.stemmer;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.LinkedHashSet;
import java.util.Set;
import net.jqwik.api.ForAll;
import net.jqwik.api.Label;
import net.jqwik.api.Property;
import net.jqwik.api.Tag;
/**
* Property-based tests for patch-command stemmer tries.
*
* <p>
* These properties verify the most important semantic contract of compiled
* stemmer dictionaries: every patch returned for a known input word must decode
* to one of the acceptable stems declared by the source scenario, and binary
* persistence must not alter that behavior.
*/
@Label("Stemmer patch trie properties")
@Tag("unit")
@Tag("property")
@Tag("stemming")
class StemmerPatchTrieProperties extends PropertyBasedTestSupport {
/**
* Verifies that every returned patch reconstructs only acceptable stems for the
* observed word set represented by one generated stemmer scenario.
*
* @param scenario generated stemmer scenario
* @param reductionMode reduction mode
*/
@Property(tries = 60)
@Label("returned patches should reconstruct only acceptable stems")
void returnedPatchesShouldReconstructOnlyAcceptableStems(@ForAll("stemmerScenarios") final StemmerScenario scenario,
@ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> trie = buildStemmerTrie(scenario, reductionMode, true);
for (String observedWord : scenario.observedWords()) {
final Set<String> acceptableStems = scenario.acceptableStemsFor(observedWord);
final String preferredPatch = trie.get(observedWord);
final String[] allPatches = trie.getAll(observedWord);
assertTrue(preferredPatch != null && !preferredPatch.isEmpty(),
"preferred patch must exist for an observed word.");
assertTrue(allPatches.length >= 1, "at least one patch must exist for an observed word.");
assertTrue(acceptableStems.contains(PatchCommandEncoder.apply(observedWord, preferredPatch)),
"preferred patch reconstructed an unexpected stem.");
final Set<String> producedStems = applyAll(observedWord, allPatches);
assertTrue(acceptableStems.containsAll(producedStems),
"getAll() must not expose a patch that reconstructs an undeclared stem.");
if (acceptableStems.contains(observedWord)) {
assertTrue(producedStems.contains(observedWord),
"storeOriginal semantics must preserve the original stem among returned results.");
}
}
}
/**
* Verifies that GZip-compressed binary persistence preserves patch-command trie
* lookups.
*
* @param scenario generated stemmer scenario
* @param reductionMode reduction mode
*/
@Property(tries = 30)
@Label("binary persistence should preserve patch-command trie lookups")
void binaryPersistenceShouldPreservePatchCommandTrieLookups(
@ForAll("stemmerScenarios") final StemmerScenario scenario, @ForAll final ReductionMode reductionMode) {
final FrequencyTrie<String> original = buildStemmerTrie(scenario, reductionMode, true);
final FrequencyTrie<String> roundTripped = roundTripCompressed(original);
for (String observedWord : scenario.observedWords()) {
assertEquals(original.get(observedWord), roundTripped.get(observedWord),
"preferred patch lookup drifted after persistence.");
assertArrayEquals(original.getAll(observedWord), roundTripped.getAll(observedWord),
"complete patch result set drifted after persistence.");
}
}
/**
* Applies all returned patches to the supplied source word.
*
* @param source source word
* @param patches returned patches
* @return decoded stem set
*/
private static Set<String> applyAll(final String source, final String[] patches) {
final LinkedHashSet<String> stems = new LinkedHashSet<>();
for (String patch : patches) {
stems.add(PatchCommandEncoder.apply(source, patch));
}
return stems;
}
/**
* Round-trips one patch-command trie through the compressed binary helper.
*
* @param trie trie to persist and reload
* @return reloaded trie
*/
private static FrequencyTrie<String> roundTripCompressed(final FrequencyTrie<String> trie) {
try {
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
StemmerPatchTrieBinaryIO.write(trie, byteArrayOutputStream);
return StemmerPatchTrieBinaryIO.read(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
} catch (IOException exception) {
throw new UncheckedIOException("Unexpected compressed binary round-trip failure.", exception);
}
}
}