From dadab5514e44bf1a03eb25c16d7e361e013233e0 Mon Sep 17 00:00:00 2001 From: Leo Galambos Date: Sat, 16 May 2026 03:24:07 +0200 Subject: [PATCH] feat: implement dense-child optimized trie lookup and enterprise test/CI profile hardening --- .github/workflows/build.yml | 12 +- .github/workflows/pages.yml | 19 +- README.md | 3 + build.gradle | 161 +++++- docs/lookup-edge-optimization.md | 193 +++++++ docs/programmatic-loading-and-building.md | 37 ++ docs/programmatic-usage.md | 1 + docs/quality-and-operations.md | 21 + docs/quick-start.md | 30 + docs/reports.md | 2 +- docs/test-taxonomy-and-filtering.md | 216 ++++++++ gradle/maven-pom.gradle | 2 +- mkdocs.yml | 2 + .../org/egothor/stemmer/FrequencyTrie.java | 524 +++++++++++------- .../stemmer/StemmerPatchTrieBinaryIO.java | 71 +++ .../stemmer/StemmerPatchTrieLoader.java | 55 +- .../egothor/stemmer/trie/CompiledNode.java | 260 ++++++++- .../stemmer/CompileIntegrationTest.java | 12 +- .../java/org/egothor/stemmer/CompileTest.java | 5 +- .../CompiledTrieArtifactRegressionTest.java | 3 +- .../stemmer/DiacriticStripperTest.java | 3 +- .../stemmer/FrequencyTrieBuildersTest.java | 2 +- .../stemmer/FrequencyTrieProperties.java | 4 +- .../egothor/stemmer/FrequencyTrieTest.java | 306 ++++++++++ .../FuzzStemmerAndTrieCompilationTest.java | 3 +- .../PatchCommandEncoderProperties.java | 4 +- .../stemmer/PatchCommandEncoderTest.java | 6 +- .../stemmer/StemmerDictionaryParserTest.java | 1 + .../StemmerKnowledgeExperimentTest.java | 2 +- .../stemmer/StemmerPatchTrieBinaryIOTest.java | 153 ++++- .../stemmer/StemmerPatchTrieLoaderTest.java | 71 ++- .../stemmer/StemmerPatchTrieProperties.java | 5 +- .../org/egothor/stemmer/TrieMetadataTest.java | 2 + .../stemmer/WordTraversalDirectionTest.java | 2 + .../stemmer/trie/ChildDescriptorTest.java | 2 +- .../trie/CompiledNodeAndNodeDataTest.java | 135 ++++- .../trie/DominantLocalDescriptorTest.java | 2 +- .../stemmer/trie/LocalValueSummaryTest.java | 2 +- .../egothor/stemmer/trie/MutableNodeTest.java | 2 +- .../trie/RankedLocalDescriptorTest.java | 2 +- .../egothor/stemmer/trie/ReducedNodeTest.java | 2 +- .../stemmer/trie/ReductionContextTest.java | 2 +- .../stemmer/trie/ReductionSignatureTest.java | 2 +- .../trie/UnorderedLocalDescriptorTest.java | 2 +- 44 files changed, 2052 insertions(+), 294 deletions(-) create mode 100644 docs/lookup-edge-optimization.md create mode 100644 docs/test-taxonomy-and-filtering.md diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9d6de74..eb5a5b6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,7 +51,7 @@ jobs: test -f gradle/verification-metadata.xml - name: Execute build, tests, PMD, coverage, Javadoc, distribution packaging, and SBOM generation - run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport distZip cyclonedxBom + run: ./gradlew --no-daemon clean ciRelease distZip pmdMain javadoc jacocoCiReleaseReport cyclonedxBom - name: Upload SBOM if: always() @@ -70,8 +70,8 @@ jobs: with: name: test-reports path: | - build/reports/tests/test - build/test-results/test + build/reports/tests + build/test-results if-no-files-found: warn retention-days: 14 @@ -90,8 +90,8 @@ jobs: with: name: coverage-reports path: | - build/reports/jacoco/test/html - build/reports/jacoco/test/jacocoTestReport.xml + build/reports/jacoco/jacocoCiReleaseReport/html + build/reports/jacoco/jacocoCiReleaseReport/jacocoCiReleaseReport.xml if-no-files-found: warn retention-days: 14 @@ -160,7 +160,7 @@ jobs: env: SIGNING_KEY: ${{ secrets.SIGNING_KEY }} SIGNING_PASSWORD: ${{ secrets.SIGNING_PASSWORD }} - run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport cyclonedxBom centralBundle + run: ./gradlew --no-daemon clean ciRelease distZip pmdMain javadoc jacocoCiReleaseReport cyclonedxBom centralBundle - name: Generate release changelog shell: bash diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 513a1c6..f1447d5 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -70,7 +70,7 @@ jobs: test -f gradle/verification-metadata.xml - name: Build reports for publication - run: ./gradlew --no-daemon clean build pmdMain javadoc jacocoTestReport pitest jmh cyclonedxBom + run: ./gradlew --no-daemon clean ciRelease pmdMain javadoc jacocoCiReleaseReport pitest jmh cyclonedxBom - name: Prepare gh-pages worktree shell: bash @@ -93,6 +93,9 @@ jobs: run: | set -euo pipefail + TEST_REPORT_DIR="build/reports/tests/ciRelease" + JACOCO_REPORT_DIR="build/reports/jacoco/jacocoCiReleaseReport" + SITE_DIR=".gh-pages" RUN_DIR="${SITE_DIR}/builds/${GITHUB_RUN_NUMBER}" RUN_METRICS_DIR="${RUN_DIR}/metrics" @@ -106,14 +109,14 @@ jobs: cp -R build/docs/javadoc "${RUN_DIR}/javadoc" cp -R build/docs/javadoc "${LATEST_DIR}/javadoc" - cp -R build/reports/tests/test "${RUN_DIR}/test" - cp -R build/reports/tests/test "${LATEST_DIR}/test" + cp -R "${TEST_REPORT_DIR}" "${RUN_DIR}/test" + cp -R "${TEST_REPORT_DIR}" "${LATEST_DIR}/test" cp -R build/reports/pmd "${RUN_DIR}/pmd" cp -R build/reports/pmd "${LATEST_DIR}/pmd" - cp -R build/reports/jacoco/test/html "${RUN_DIR}/coverage" - cp -R build/reports/jacoco/test/html "${LATEST_DIR}/coverage" + cp -R "${JACOCO_REPORT_DIR}/html" "${RUN_DIR}/coverage" + cp -R "${JACOCO_REPORT_DIR}/html" "${LATEST_DIR}/coverage" cp -R build/reports/pitest "${RUN_DIR}/pitest" cp -R build/reports/pitest "${LATEST_DIR}/pitest" @@ -178,7 +181,7 @@ jobs: python3 \ ./tools/generate-pages-badges.py \ - --jacoco-xml build/reports/jacoco/test/jacocoTestReport.xml \ + --jacoco-xml "${JACOCO_REPORT_DIR}/jacocoCiReleaseReport.xml" \ --pit-xml build/reports/pitest/mutations.xml \ --jmh-csv build/reports/jmh/jmh-results.csv \ --run-metrics-dir "${RUN_METRICS_DIR}" \ @@ -228,7 +231,7 @@ jobs:

Build ${GITHUB_RUN_NUMBER} from commit ${GITHUB_SHA}

*/ -@Tag("unit") +@Tag("compat") @Tag("regression") @Tag("determinism") @Tag("serialization") +@Tag("trie") @TestInstance(TestInstance.Lifecycle.PER_CLASS) final class CompiledTrieArtifactRegressionTest { diff --git a/src/test/java/org/egothor/stemmer/DiacriticStripperTest.java b/src/test/java/org/egothor/stemmer/DiacriticStripperTest.java index 66a8086..6f87781 100644 --- a/src/test/java/org/egothor/stemmer/DiacriticStripperTest.java +++ b/src/test/java/org/egothor/stemmer/DiacriticStripperTest.java @@ -41,7 +41,8 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link DiacriticStripper}. */ @Tag("unit") -@Tag("diacritics") +@Tag("diacritic") +@Tag("stemmer") @DisplayName("DiacriticStripper") class DiacriticStripperTest { diff --git a/src/test/java/org/egothor/stemmer/FrequencyTrieBuildersTest.java b/src/test/java/org/egothor/stemmer/FrequencyTrieBuildersTest.java index 51a23c0..c850346 100644 --- a/src/test/java/org/egothor/stemmer/FrequencyTrieBuildersTest.java +++ b/src/test/java/org/egothor/stemmer/FrequencyTrieBuildersTest.java @@ -59,7 +59,7 @@ import org.junit.jupiter.api.Test; */ @DisplayName("FrequencyTrieBuilders") @Tag("unit") -@Tag("builder") +@Tag("construction") @Tag("frequency-trie") class FrequencyTrieBuildersTest { diff --git a/src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java b/src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java index e0ef4a9..00e898c 100644 --- a/src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java +++ b/src/test/java/org/egothor/stemmer/FrequencyTrieProperties.java @@ -47,7 +47,7 @@ import java.util.List; import net.jqwik.api.ForAll; import net.jqwik.api.Label; import net.jqwik.api.Property; -import net.jqwik.api.Tag; +import org.junit.jupiter.api.Tag; /** * Property-based tests for the compiled trie abstraction. @@ -59,9 +59,9 @@ import net.jqwik.api.Tag; * core algorithm without overfitting to particular fixture data. */ @Label("FrequencyTrie properties") -@Tag("unit") @Tag("property") @Tag("trie") +@Tag("frequency-trie") class FrequencyTrieProperties extends PropertyBasedTestSupport { /** diff --git a/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java b/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java index 6835799..7afbe15 100644 --- a/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java +++ b/src/test/java/org/egothor/stemmer/FrequencyTrieTest.java @@ -33,6 +33,7 @@ package org.egothor.stemmer; import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; @@ -379,6 +380,24 @@ class FrequencyTrieTest { assertThrows(UnsupportedOperationException.class, () -> entries.add(new ValueCount("z", 1))); } + /** + * Verifies that {@link FrequencyTrie#getEntries(String)} short-circuits to a one-item immutable list. + */ + @Test + @DisplayName("getEntries returns a one-item list for single stored values") + void getEntriesReturnsSingleItemListForSingleStoredValue() { + final FrequencyTrie.Builder builder = rankedBuilder(); + + builder.put("gamma", "only"); + + final FrequencyTrie trie = builder.build(); + + final List> entries = trie.getEntries("gamma"); + + assertAll(() -> assertEquals(List.of(new ValueCount("only", 1)), entries), + () -> assertThrows(UnsupportedOperationException.class, () -> entries.add(new ValueCount("z", 1)))); + } + /** * Verifies that equal frequencies prefer the shorter string representation. */ @@ -755,6 +774,115 @@ class FrequencyTrieTest { .readFrom(new ByteArrayInputStream(serializedEmptyTrie), String[]::new, null))); } + /** + * Verifies that reading a compiled trie with a negative max-expanded override + * smaller than -1 is rejected. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom rejects invalid maxExpandedIndex override") + void readFromRejectsInvalidMaxExpandedIndexOverride() { + final byte[] bytes = createSerializedStream(0x45475452, 1, 1, 0, new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + () -> FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC, -2)); + + assertEquals("maxExpandedIndex must be >= -1.", exception.getMessage()); + } + + /** + * Verifies that the max-expanded override controls dense lookup materialization + * while preserving lookup semantics. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom respects dense lookup max-expanded index override") + void readFromRespectsDenseLookupMaxExpandedIndexOverride() throws IOException { + final FrequencyTrie.Builder builder = rankedBuilder(); + + builder.put("a", "a"); + builder.put("b", "b"); + builder.put("c", "c"); + builder.put("d", "d"); + + final FrequencyTrie original = builder.build(); + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + original.writeTo(outputStream, STRING_CODEC); + final byte[] serializedTrie = outputStream.toByteArray(); + + final FrequencyTrie defaultDense = FrequencyTrie.readFrom(new ByteArrayInputStream(serializedTrie), String[]::new, + STRING_CODEC); + final FrequencyTrie defaultDenseByNegative = FrequencyTrie.readFrom(new ByteArrayInputStream(serializedTrie), + String[]::new, STRING_CODEC, -1); + final FrequencyTrie disabledDense = FrequencyTrie.readFrom(new ByteArrayInputStream(serializedTrie), String[]::new, + STRING_CODEC, 0); + + assertAll( + () -> assertTrue(defaultDense.root().hasDenseLookup(), + "Default read should enable dense lookup for compact first-level edges."), + () -> assertTrue(defaultDenseByNegative.root().hasDenseLookup(), + "Negative override should use the default dense lookup span."), + () -> assertFalse(disabledDense.root().hasDenseLookup(), + "Zero override should disable dense lookup tables."), + () -> assertEquals(original.get("a"), disabledDense.get("a")), + () -> assertEquals(original.get("b"), disabledDense.get("b")), + () -> assertEquals(original.get("c"), disabledDense.get("c")), + () -> assertEquals(original.get("d"), disabledDense.get("d")), + () -> assertEquals(original.get("z"), disabledDense.get("z"))); + } + + /** + * Verifies that cyclic serialized node references are rejected as invalid + * serialization. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom rejects cyclic serialized node references") + void readFromRejectsCyclicSerializedNodeReferences() { + final byte[] bytes = createSerializedStream(0x45475452, 1, 2, 0, new NodeWriter[] { + dataOutput -> { + dataOutput.writeInt(1); + dataOutput.writeChar('b'); + dataOutput.writeInt(1); + dataOutput.writeInt(0); + }, + dataOutput -> { + dataOutput.writeInt(1); + dataOutput.writeChar('a'); + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final IOException exception = assertThrows(IOException.class, + () -> FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC)); + + assertTrue(exception.getMessage().contains("cyclic reference detected")); + } + + /** + * Verifies that child node references outside the valid serialized range are + * rejected. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom rejects invalid child node identifiers") + void readFromRejectsInvalidChildNodeId() { + final byte[] bytes = createSerializedStream(0x45475452, 1, 1, 0, new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(1); + dataOutput.writeChar('a'); + dataOutput.writeInt(3); + dataOutput.writeInt(0); + } }); + + final IOException exception = assertThrows(IOException.class, + () -> FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC)); + + assertTrue(exception.getMessage().contains("Invalid child node id")); + } + /** * Verifies that deserialization rejects an invalid stream magic header. */ @@ -785,6 +913,27 @@ class FrequencyTrieTest { assertTrue(exception.getMessage().contains("Unsupported trie stream version")); } + /** + * Verifies that the latest stream version validates textual metadata blocks. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom rejects invalid textual metadata block") + void readFromRejectsInvalidTextualMetadataBlock() { + final int version = FrequencyTrie.currentFormatVersion(); + final byte[] bytes = createSerializedStream(0x45475452, version, 1, 0, dataOutput -> { + dataOutput.writeUTF("not valid metadata"); + }, new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final IOException exception = assertThrows(IOException.class, + () -> FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC)); + + assertTrue(exception.getMessage().contains("Invalid metadata block")); + } + /** * Verifies that deserialization rejects a negative node count. */ @@ -862,6 +1011,129 @@ class FrequencyTrieTest { assertTrue(exception.getMessage().contains("Non-positive stored count")); } + /** + * Verifies that legacy version 1 metadata uses compatibility defaults. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom supports legacy version 1 metadata") + void readFromSupportsLegacyVersionOneMetadata() throws IOException { + final byte[] bytes = createSerializedStream(0x45475452, 1, 1, 0, new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final FrequencyTrie trie = FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC); + + assertEquals(TrieMetadata.legacy(1, WordTraversalDirection.BACKWARD), trie.metadata()); + } + + /** + * Verifies that legacy version 2 metadata stores traversal direction and uses + * compatibility defaults for other values. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom supports legacy version 2 metadata") + void readFromSupportsLegacyVersionTwoMetadata() throws IOException { + final byte[] bytes = createSerializedStream(0x45475452, 2, 1, 0, + dataOutput -> dataOutput.writeInt(WordTraversalDirection.FORWARD.ordinal()), new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final FrequencyTrie trie = FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC); + + assertEquals(TrieMetadata.legacy(2, WordTraversalDirection.FORWARD), trie.metadata()); + } + + /** + * Verifies that version 3 metadata includes reduction and diacritic + * processing settings. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom parses version 3 metadata") + void readFromParsesVersionThreeMetadata() throws IOException { + final ReductionSettings reductionSettings = new ReductionSettings( + ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_UNORDERED_GET_ALL_RESULTS, 81, 4); + + final byte[] bytes = createSerializedStream(0x45475452, 3, 1, 0, + dataOutput -> { + dataOutput.writeInt(WordTraversalDirection.BACKWARD.ordinal()); + dataOutput.writeInt(reductionSettings.reductionMode().ordinal()); + dataOutput.writeInt(reductionSettings.dominantWinnerMinPercent()); + dataOutput.writeInt(reductionSettings.dominantWinnerOverSecondRatio()); + dataOutput.writeInt(DiacriticProcessingMode.REMOVE.ordinal()); + }, + new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final FrequencyTrie trie = FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC); + final TrieMetadata metadata = trie.metadata(); + + assertAll(() -> assertEquals(3, metadata.formatVersion()), + () -> assertEquals(WordTraversalDirection.BACKWARD, metadata.traversalDirection()), + () -> assertEquals(reductionSettings, metadata.reductionSettings()), + () -> assertEquals(DiacriticProcessingMode.REMOVE, metadata.diacriticProcessingMode()), + () -> assertEquals(CaseProcessingMode.LOWERCASE_WITH_LOCALE_ROOT, metadata.caseProcessingMode())); + } + + /** + * Verifies that version 4 metadata additionally stores case-processing mode. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom parses version 4 case processing metadata") + void readFromParsesVersionFourCaseMetadata() throws IOException { + final ReductionSettings reductionSettings = new ReductionSettings( + ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS, 75, 3); + + final byte[] bytes = createSerializedStream(0x45475452, 4, 1, 0, + dataOutput -> { + dataOutput.writeInt(WordTraversalDirection.FORWARD.ordinal()); + dataOutput.writeInt(reductionSettings.reductionMode().ordinal()); + dataOutput.writeInt(reductionSettings.dominantWinnerMinPercent()); + dataOutput.writeInt(reductionSettings.dominantWinnerOverSecondRatio()); + dataOutput.writeInt(DiacriticProcessingMode.AS_IS.ordinal()); + dataOutput.writeInt(CaseProcessingMode.AS_IS.ordinal()); + }, + new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final FrequencyTrie trie = FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC); + final TrieMetadata metadata = trie.metadata(); + + assertAll(() -> assertEquals(4, metadata.formatVersion()), + () -> assertEquals(WordTraversalDirection.FORWARD, metadata.traversalDirection()), + () -> assertEquals(reductionSettings, metadata.reductionSettings()), + () -> assertEquals(DiacriticProcessingMode.AS_IS, metadata.diacriticProcessingMode()), + () -> assertEquals(CaseProcessingMode.AS_IS, metadata.caseProcessingMode())); + } + + /** + * Verifies that invalid legacy metadata ordinals are rejected by validation. + */ + @Test + @Tag("persistence") + @DisplayName("readFrom rejects invalid metadata ordinal in legacy stream") + void readFromRejectsInvalidLegacyMetadataOrdinal() { + final byte[] bytes = createSerializedStream(0x45475452, 2, 1, 0, + dataOutput -> dataOutput.writeInt(999), new NodeWriter[] { dataOutput -> { + dataOutput.writeInt(0); + dataOutput.writeInt(0); + } }); + + final IOException exception = assertThrows(IOException.class, + () -> FrequencyTrie.readFrom(new ByteArrayInputStream(bytes), String[]::new, STRING_CODEC)); + + assertTrue(exception.getMessage().contains("Invalid traversal direction ordinal")); + } + /** * Writes one node body into a synthetic serialized trie stream. */ @@ -889,6 +1161,24 @@ class FrequencyTrieTest { */ private static byte[] createSerializedStream(final int magic, final int version, final int nodeCount, final int rootNodeId, final NodeWriter[] nodes) { + return createSerializedStream(magic, version, nodeCount, rootNodeId, dataOutput -> { + // legacy and text-based versions write their metadata differently. + }, nodes); + } + + /** + * Writes a synthetic serialized trie stream with a metadata writer hook. + * + * @param magic stream magic + * @param version stream version + * @param nodeCount declared node count + * @param rootNodeId declared root node identifier + * @param metadata version-specific metadata writer + * @param nodes node body writers + * @return serialized bytes + */ + private static byte[] createSerializedStream(final int magic, final int version, final int nodeCount, + final int rootNodeId, final MetadataWriter metadata, final NodeWriter[] nodes) { try { final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); final DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); @@ -897,6 +1187,7 @@ class FrequencyTrieTest { dataOutputStream.writeInt(version); dataOutputStream.writeInt(nodeCount); dataOutputStream.writeInt(rootNodeId); + metadata.write(dataOutputStream); for (NodeWriter node : nodes) { node.write(dataOutputStream); @@ -908,4 +1199,19 @@ class FrequencyTrieTest { throw new IllegalStateException("Unexpected I/O while building synthetic trie stream.", exception); } } + + /** + * Writes one synthetic metadata block. + */ + @FunctionalInterface + private interface MetadataWriter { + + /** + * Writes metadata bytes for one stream version. + * + * @param dataOutput output stream + * @throws IOException if writing fails + */ + void write(DataOutputStream dataOutput) throws IOException; + } } diff --git a/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java b/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java index 933a749..a0e11b0 100644 --- a/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java +++ b/src/test/java/org/egothor/stemmer/FuzzStemmerAndTrieCompilationTest.java @@ -65,10 +65,9 @@ import org.junit.jupiter.api.io.TempDir; * stems declared by the source dictionary. */ @DisplayName("Deterministic fuzz-style trie and stemmer compilation") -@Tag("unit") @Tag("fuzz") @Tag("trie") -@Tag("stemming") +@Tag("stemmer") class FuzzStemmerAndTrieCompilationTest { /** diff --git a/src/test/java/org/egothor/stemmer/PatchCommandEncoderProperties.java b/src/test/java/org/egothor/stemmer/PatchCommandEncoderProperties.java index 65655c9..1c409b6 100644 --- a/src/test/java/org/egothor/stemmer/PatchCommandEncoderProperties.java +++ b/src/test/java/org/egothor/stemmer/PatchCommandEncoderProperties.java @@ -36,7 +36,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import net.jqwik.api.ForAll; import net.jqwik.api.Label; import net.jqwik.api.Property; -import net.jqwik.api.Tag; +import org.junit.jupiter.api.Tag; /** * Property-based tests for {@link PatchCommandEncoder}. @@ -47,9 +47,9 @@ import net.jqwik.api.Tag; * reconstruct the exact requested target. */ @Label("PatchCommandEncoder properties") -@Tag("unit") @Tag("property") @Tag("patch") +@Tag("stemmer") class PatchCommandEncoderProperties extends PropertyBasedTestSupport { /** diff --git a/src/test/java/org/egothor/stemmer/PatchCommandEncoderTest.java b/src/test/java/org/egothor/stemmer/PatchCommandEncoderTest.java index c8decdc..b608ef7 100644 --- a/src/test/java/org/egothor/stemmer/PatchCommandEncoderTest.java +++ b/src/test/java/org/egothor/stemmer/PatchCommandEncoderTest.java @@ -241,7 +241,7 @@ class PatchCommandEncoderTest { */ @Nested @DisplayName("construction") - @Tag("constructor") + @Tag("construction") class ConstructionTests { /** @@ -326,7 +326,7 @@ class PatchCommandEncoderTest { */ @Nested @DisplayName("encode(String, String)") - @Tag("encode") + @Tag("encoding") class EncodeTests { /** @@ -658,7 +658,7 @@ class PatchCommandEncoderTest { */ @Nested @DisplayName("reversed-word processing") - @Tag("reverse") + @Tag("normalization") class ReversedWordProcessingTests { /** diff --git a/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java b/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java index 353c29c..bbf9267 100644 --- a/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java +++ b/src/test/java/org/egothor/stemmer/StemmerDictionaryParserTest.java @@ -75,6 +75,7 @@ import org.junit.jupiter.api.io.TempDir; @DisplayName("StemmerDictionaryParser") @Tag("unit") @Tag("parser") +@Tag("stemmer") class StemmerDictionaryParserTest { /** diff --git a/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java b/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java index 7a233d8..ad2031f 100644 --- a/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java +++ b/src/test/java/org/egothor/stemmer/StemmerKnowledgeExperimentTest.java @@ -54,9 +54,9 @@ import org.junit.jupiter.api.io.TempDir; /** * Tests for {@link StemmerKnowledgeExperiment}. */ -@Tag("unit") @Tag("integration") @Tag("stemmer") +@Tag("trie") final class StemmerKnowledgeExperimentTest { /** diff --git a/src/test/java/org/egothor/stemmer/StemmerPatchTrieBinaryIOTest.java b/src/test/java/org/egothor/stemmer/StemmerPatchTrieBinaryIOTest.java index a7171c0..bbcca83 100644 --- a/src/test/java/org/egothor/stemmer/StemmerPatchTrieBinaryIOTest.java +++ b/src/test/java/org/egothor/stemmer/StemmerPatchTrieBinaryIOTest.java @@ -38,6 +38,8 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.verify; @@ -91,6 +93,8 @@ import org.mockito.MockedStatic; @Tag("unit") @Tag("io") @Tag("persistence") +@Tag("serialization") +@Tag("trie") @DisplayName("StemmerPatchTrieBinaryIO") class StemmerPatchTrieBinaryIOTest { @@ -299,9 +303,19 @@ class StemmerPatchTrieBinaryIOTest { "read(Path) must reject null path."), () -> assertThrows(NullPointerException.class, () -> StemmerPatchTrieBinaryIO.read((String) null), "read(String) must reject null file name."), + () -> assertThrows(NullPointerException.class, + () -> StemmerPatchTrieBinaryIO.read((Path) null, FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX), + "read(Path, int) must reject null path."), + () -> assertThrows(NullPointerException.class, + () -> StemmerPatchTrieBinaryIO.read((String) null, + FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX), + "read(String, int) must reject null file name."), () -> assertThrows(NullPointerException.class, () -> StemmerPatchTrieBinaryIO.read((ByteArrayInputStream) null), - "read(InputStream) must reject null input stream.")); + "read(InputStream) must reject null input stream."), + () -> assertThrows(NullPointerException.class, + () -> StemmerPatchTrieBinaryIO.read((ByteArrayInputStream) null, FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX), + "read(InputStream, int) must reject null input stream.")); } /** @@ -385,6 +399,143 @@ class StemmerPatchTrieBinaryIOTest { } } + /** + * Verifies that stream overload with dense span override delegates to the + * four-argument readFrom method. + */ + @SuppressWarnings("unchecked") + @Test + @DisplayName("Should delegate stream read with dense span override") + void shouldDelegateInputStreamReadWithDenseSpanOverride() throws IOException { + final FrequencyTrie expectedTrie = mock(FrequencyTrie.class); + final byte[] gzipPayload = gzip("binary-content-with-max-expanded-index"); + + try (@SuppressWarnings("rawtypes") + MockedStatic mockedStatic = mockStatic(FrequencyTrie.class)) { + mockedStatic.when(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), anyInt())).thenReturn(expectedTrie); + + final FrequencyTrie actualTrie = StemmerPatchTrieBinaryIO + .read(new ByteArrayInputStream(gzipPayload), 17); + + assertSame(expectedTrie, actualTrie, + "read(InputStream, int) must return the trie produced by FrequencyTrie.readFrom(...)."); + + mockedStatic.verify(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), eq(17))); + } + } + + /** + * Verifies that path overload with dense span override delegates to the + * same method overload with the override parameter. + */ + @SuppressWarnings("unchecked") + @Test + @DisplayName("Should delegate path read with dense span override") + void shouldDelegatePathReadWithDenseSpanOverride() throws IOException { + final FrequencyTrie expectedTrie = mock(FrequencyTrie.class); + final Path sourceFile = temporaryDirectory.resolve("input-max-expanded.bin.gz"); + Files.write(sourceFile, gzip("path-based-max-expanded-index")); + + try (@SuppressWarnings("rawtypes") + MockedStatic mockedStatic = mockStatic(FrequencyTrie.class)) { + mockedStatic.when(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), anyInt())).thenReturn(expectedTrie); + + final FrequencyTrie actualTrie = StemmerPatchTrieBinaryIO.read(sourceFile, 0); + + assertSame(expectedTrie, actualTrie, + "read(Path, int) must return the trie produced by FrequencyTrie.readFrom(...)."); + + mockedStatic.verify(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), eq(0))); + } + } + + /** + * Verifies that string path overload with dense span override delegates to the + * same method overload with the override parameter. + */ + @SuppressWarnings("unchecked") + @Test + @DisplayName("Should delegate file name read with dense span override") + void shouldDelegateStringReadWithDenseSpanOverride() throws IOException { + final FrequencyTrie expectedTrie = mock(FrequencyTrie.class); + final Path sourceFile = temporaryDirectory.resolve("input-string-max-expanded.bin.gz"); + Files.write(sourceFile, gzip("string-based-max-expanded-index")); + + try (@SuppressWarnings("rawtypes") + MockedStatic mockedStatic = mockStatic(FrequencyTrie.class)) { + mockedStatic.when(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), anyInt())).thenReturn(expectedTrie); + + final FrequencyTrie actualTrie = StemmerPatchTrieBinaryIO.read(sourceFile.toString(), 32); + + assertSame(expectedTrie, actualTrie, + "read(String, int) must return the trie produced by FrequencyTrie.readFrom(...)."); + + mockedStatic.verify(() -> FrequencyTrie.readFrom(any(DataInputStream.class), any(), + any(FrequencyTrie.ValueStreamCodec.class), eq(32))); + } + } + + /** + * Verifies that metadata-only read parses and returns the persisted metadata. + */ + @Test + @DisplayName("Should read metadata from gzip payload") + void shouldReadMetadataFromGzipPayload() throws IOException { + final FrequencyTrie.Builder builder = new FrequencyTrie.Builder(String[]::new, + ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS)); + builder.put("run", PatchCommandEncoder.builder().build().encode("running", "run")); + final FrequencyTrie trie = builder.build(); + + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + StemmerPatchTrieBinaryIO.write(trie, outputStream); + + final TrieMetadata metadata = StemmerPatchTrieBinaryIO.readMetadata(new ByteArrayInputStream(outputStream.toByteArray())); + + assertEquals(trie.metadata(), metadata, + "readMetadata(InputStream) must return the same metadata persisted by write()."); + } + + /** + * Verifies that metadata can be read from a binary file path. + */ + @Test + @DisplayName("Should read metadata from file path") + void shouldReadMetadataFromPath() throws IOException { + final FrequencyTrie.Builder builder = new FrequencyTrie.Builder(String[]::new, + ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS)); + builder.put("city", PatchCommandEncoder.builder().build().encode("cities", "city")); + final FrequencyTrie trie = builder.build(); + + final Path sourceFile = temporaryDirectory.resolve("metadata-path.bin.gz"); + StemmerPatchTrieBinaryIO.write(trie, sourceFile); + + final TrieMetadata metadata = StemmerPatchTrieBinaryIO.readMetadata(sourceFile); + assertEquals(trie.metadata(), metadata); + } + + /** + * Verifies that metadata can be read from a binary file name. + */ + @Test + @DisplayName("Should read metadata from file name") + void shouldReadMetadataFromStringPath() throws IOException { + final FrequencyTrie.Builder builder = new FrequencyTrie.Builder(String[]::new, + ReductionSettings.withDefaults(ReductionMode.MERGE_SUBTREES_WITH_EQUIVALENT_RANKED_GET_ALL_RESULTS)); + builder.put("city", PatchCommandEncoder.builder().build().encode("cities", "city")); + final FrequencyTrie trie = builder.build(); + + final Path sourceFile = temporaryDirectory.resolve("metadata-string.bin.gz"); + StemmerPatchTrieBinaryIO.write(trie, sourceFile); + + final TrieMetadata metadata = StemmerPatchTrieBinaryIO.readMetadata(sourceFile.toString()); + assertEquals(trie.metadata(), metadata); + } + /** * Verifies that malformed non-GZip input is reported as an I/O failure. */ diff --git a/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java b/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java index f9f17ae..6778214 100644 --- a/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java +++ b/src/test/java/org/egothor/stemmer/StemmerPatchTrieLoaderTest.java @@ -85,9 +85,10 @@ import org.junit.jupiter.params.provider.MethodSource; *
  • the current bundled language set, including right-to-left metadata
  • * */ -@Tag("unit") @Tag("integration") @Tag("stemmer") +@Tag("io") +@Tag("parser") @TestInstance(TestInstance.Lifecycle.PER_CLASS) final class StemmerPatchTrieLoaderTest { @@ -210,36 +211,43 @@ final class StemmerPatchTrieLoaderTest { Arguments.of("14-load-binary-string", (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((String) null), StemmerPatchTrieLoader.FILENAME_REQUIRED), - Arguments.of("15-load-binary-stream", + Arguments.of("15-load-binary-path-override", + (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((Path) null, FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX), + "path"), + Arguments.of("16-load-binary-string-override", + (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((String) null, + FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX), + StemmerPatchTrieLoader.FILENAME_REQUIRED), + Arguments.of("17-load-binary-stream", (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinary((InputStream) null), "inputStream"), - Arguments.of("16-save-binary-null-trie-path", + Arguments.of("18-save-binary-null-trie-path", (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(null, tempPath()), "trie"), - Arguments.of("17-save-binary-null-path", + Arguments.of("19-save-binary-null-path", (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(trie, (Path) null), "path"), - Arguments.of("18-save-binary-null-trie-string", + Arguments.of("20-save-binary-null-trie-string", (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(null, tempPath().toString()), "trie"), - Arguments.of("19-save-binary-null-string", + Arguments.of("21-save-binary-null-string", (ExecutableOperation) () -> StemmerPatchTrieLoader.saveBinary(trie, (String) null), StemmerPatchTrieLoader.FILENAME_REQUIRED), - Arguments.of("20-load-language-null-metadata", + Arguments.of("22-load-language-null-metadata", (ExecutableOperation) () -> StemmerPatchTrieLoader.load(StemmerPatchTrieLoader.Language.US_UK, true, (TrieMetadata) null), "metadata"), - Arguments.of("21-load-path-null-metadata", + Arguments.of("23-load-path-null-metadata", (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath(), true, (TrieMetadata) null), "metadata"), - Arguments.of("22-load-string-null-metadata", + Arguments.of("24-load-string-null-metadata", (ExecutableOperation) () -> StemmerPatchTrieLoader.load(tempPath().toString(), true, (TrieMetadata) null), "metadata"), - Arguments.of("23-load-binary-metadata-path-null", + Arguments.of("25-load-binary-metadata-path-null", (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinaryMetadata((Path) null), "path"), - Arguments.of("24-load-binary-metadata-string-null", + Arguments.of("26-load-binary-metadata-string-null", (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinaryMetadata((String) null), StemmerPatchTrieLoader.FILENAME_REQUIRED), - Arguments.of("25-load-binary-metadata-stream-null", + Arguments.of("27-load-binary-metadata-stream-null", (ExecutableOperation) () -> StemmerPatchTrieLoader.loadBinaryMetadata((InputStream) null), "inputStream")); } @@ -512,6 +520,44 @@ final class StemmerPatchTrieLoaderTest { } } + /** + * Verifies that binary load overloads with an explicit dense lookup span + * preserve trie semantics while honoring the dense-layout override. + */ + @Test + @DisplayName("Binary dense-span override overloads should load equivalent tries") + void shouldLoadBinaryWithDenseSpanOverrideOverloads() throws IOException { + final Path dictionaryFile = writeDictionary(""" + run running runs runner + city cities + study studies studying + """); + final Path binaryFile = tempDir.resolve("stemmer-trie-overrides.bin.gz"); + + final FrequencyTrie original = StemmerPatchTrieLoader.load(dictionaryFile, true, + DEFAULT_REDUCTION_MODE); + + StemmerPatchTrieLoader.saveBinary(original, binaryFile); + + final FrequencyTrie fromPathDefault = StemmerPatchTrieLoader.loadBinary(binaryFile); + final FrequencyTrie fromPathDefaultByNegative = StemmerPatchTrieLoader.loadBinary(binaryFile, + FrequencyTrie.DEFAULT_MAX_EXPANDED_INDEX); + final FrequencyTrie fromPathNoDense = StemmerPatchTrieLoader.loadBinary(binaryFile, 0); + final FrequencyTrie fromStringNoDense = StemmerPatchTrieLoader.loadBinary(binaryFile.toString(), 0); + + assertTriePatchSemanticsEqual(original, fromPathDefault, "run", "running", "runner", "cities", "studying"); + assertTriePatchSemanticsEqual(original, fromPathDefaultByNegative, "run", "running", "runner", "cities", + "studying"); + assertTriePatchSemanticsEqual(original, fromPathNoDense, "run", "running", "runner", "cities", "studying"); + assertTriePatchSemanticsEqual(original, fromStringNoDense, "run", "running", "runner", "cities", + "studying"); + + assertFalse(fromPathNoDense.root().hasDenseLookup(), + "Zero span should disable dense lookup on the loaded root."); + assertFalse(fromStringNoDense.root().hasDenseLookup(), + "Zero span should disable dense lookup on the loaded root."); + } + /** * Writes a dictionary file into the temporary directory. * @@ -530,6 +576,7 @@ final class StemmerPatchTrieLoaderTest { * Bundled dictionary integration tests. */ @Nested + @Tag("slow") @DisplayName("Bundled dictionaries") final class BundledDictionaryTests { diff --git a/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java b/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java index 23f8fc5..27474cc 100644 --- a/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java +++ b/src/test/java/org/egothor/stemmer/StemmerPatchTrieProperties.java @@ -44,7 +44,7 @@ import java.util.Set; import net.jqwik.api.ForAll; import net.jqwik.api.Label; import net.jqwik.api.Property; -import net.jqwik.api.Tag; +import org.junit.jupiter.api.Tag; /** * Property-based tests for patch-command stemmer tries. @@ -56,9 +56,8 @@ import net.jqwik.api.Tag; * persistence must not alter that behavior. */ @Label("Stemmer patch trie properties") -@Tag("unit") @Tag("property") -@Tag("stemming") +@Tag("stemmer") class StemmerPatchTrieProperties extends PropertyBasedTestSupport { /** diff --git a/src/test/java/org/egothor/stemmer/TrieMetadataTest.java b/src/test/java/org/egothor/stemmer/TrieMetadataTest.java index 9d7988a..6d0dfb8 100644 --- a/src/test/java/org/egothor/stemmer/TrieMetadataTest.java +++ b/src/test/java/org/egothor/stemmer/TrieMetadataTest.java @@ -40,6 +40,8 @@ import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; @Tag("unit") +@Tag("metadata") +@Tag("trie") @DisplayName("TrieMetadata") class TrieMetadataTest { diff --git a/src/test/java/org/egothor/stemmer/WordTraversalDirectionTest.java b/src/test/java/org/egothor/stemmer/WordTraversalDirectionTest.java index ce50176..1c8d376 100644 --- a/src/test/java/org/egothor/stemmer/WordTraversalDirectionTest.java +++ b/src/test/java/org/egothor/stemmer/WordTraversalDirectionTest.java @@ -40,6 +40,8 @@ import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; @Tag("unit") +@Tag("core") +@Tag("stemmer") @DisplayName("WordTraversalDirection") class WordTraversalDirectionTest { diff --git a/src/test/java/org/egothor/stemmer/trie/ChildDescriptorTest.java b/src/test/java/org/egothor/stemmer/trie/ChildDescriptorTest.java index 069a4a3..ee4c394 100644 --- a/src/test/java/org/egothor/stemmer/trie/ChildDescriptorTest.java +++ b/src/test/java/org/egothor/stemmer/trie/ChildDescriptorTest.java @@ -45,7 +45,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link ChildDescriptor}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("ChildDescriptor") class ChildDescriptorTest { diff --git a/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java b/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java index 96f7054..b8a9a93 100644 --- a/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java +++ b/src/test/java/org/egothor/stemmer/trie/CompiledNodeAndNodeDataTest.java @@ -31,8 +31,10 @@ package org.egothor.stemmer.trie; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Tag; @@ -43,7 +45,6 @@ import org.junit.jupiter.api.Test; * documented backing-array exposure. */ @Tag("unit") -@Tag("fast") @Tag("trie") @DisplayName("CompiledNode and NodeData") class CompiledNodeAndNodeDataTest { @@ -141,4 +142,136 @@ class CompiledNodeAndNodeDataTest { assertSame(orderedValues, node.orderedValues()); assertSame(orderedCounts, node.orderedCounts()); } + + /** + * Verifies that dense lookup is used when the interval is compact. + */ + @Test + @DisplayName("CompiledNode can resolve child via dense lookup table") + void compiledNodeUsesDenseLookupForCompactIntervals() { + @SuppressWarnings("unchecked") + final CompiledNode[] children = new CompiledNode[4]; + children[0] = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + children[1] = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + children[2] = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + children[3] = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + + final CompiledNode node = new CompiledNode<>(new char[] { 'a', 'b', 'c', 'd' }, children, + new String[] { "1", "2", "3", "4" }, new int[] { 1, 1, 1, 1 }); + + assertTrue(node.hasDenseLookup()); + + assertSame(children[0], node.findChild('a')); + assertSame(children[3], node.findChild('d')); + assertSame(null, node.findChild('z')); + } + + /** + * Verifies that fallback linear scan is used for small node degree. + */ + @Test + @DisplayName("CompiledNode resolves child by linear scan for small degree") + void compiledNodeUsesLinearScanForSmallDegree() { + @SuppressWarnings("unchecked") + final CompiledNode[] children = new CompiledNode[4]; + final CompiledNode childA = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childB = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childC = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childD = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + children[0] = childA; + children[1] = childB; + children[2] = childC; + children[3] = childD; + + final CompiledNode node = new CompiledNode<>(new char[] { 'a', 'z', '中', '你' }, children, + new String[] { "1", "2", "3", "4" }, 0, new int[] { 1, 1, 1, 1 }); + + assertFalse(node.hasDenseLookup()); + + assertSame(childA, node.findChild('a')); + assertSame(childD, node.findChild('你')); + assertSame(null, node.findChild('b')); + } + + /** + * Verifies that fallback binary search is used for larger node degree without + * dense lookup. + */ + @Test + @DisplayName("CompiledNode resolves child by binary search for large degree") + void compiledNodeUsesBinarySearchForLargeDegree() { + @SuppressWarnings("unchecked") + final CompiledNode[] children = new CompiledNode[5]; + final CompiledNode childA = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childB = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childC = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childD = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + final CompiledNode childE = new CompiledNode<>(new char[0], new CompiledNode[0], new String[0], new int[0]); + children[0] = childA; + children[1] = childB; + children[2] = childC; + children[3] = childD; + children[4] = childE; + + final CompiledNode node = new CompiledNode<>(new char[] { 'a', 'c', 'k', 't', 'z' }, children, + new String[] { "1", "2", "3", "4", "5" }, 0, new int[] { 1, 1, 1, 1, 1 }); + + assertFalse(node.hasDenseLookup()); + + assertSame(childC, node.findChild('k')); + assertSame(childE, node.findChild('z')); + assertSame(null, node.findChild('x')); + } + + /** + * Verifies the basic node-state helpers that are used by diagnostics and + * behavioral checks. + */ + @Test + @DisplayName("CompiledNode reports leaf, value and edge presence state") + void compiledNodeReportsNodeStateHelpers() { + @SuppressWarnings("unchecked") + final CompiledNode[] childless = new CompiledNode[0]; + final CompiledNode leaf = new CompiledNode<>(new char[0], childless, new String[0], new int[0]); + + assertTrue(leaf.isLeaf()); + assertFalse(leaf.hasChildren()); + assertFalse(leaf.hasValues()); + assertFalse(leaf.hasEdge('a')); + + @SuppressWarnings("unchecked") + final CompiledNode[] child = new CompiledNode[1]; + final String[] orderedValues = new String[] { "leaf" }; + final int[] orderedCounts = new int[] { 1 }; + child[0] = new CompiledNode<>(new char[0], new CompiledNode[0], orderedValues, orderedCounts); + final CompiledNode node = new CompiledNode<>(new char[] { 'a' }, child, orderedValues, orderedCounts); + + assertFalse(node.isLeaf()); + assertTrue(node.hasChildren()); + assertTrue(node.hasValues()); + assertTrue(node.valueCount() > 0); + assertTrue(node.hasEdge('a')); + assertFalse(node.hasEdge('b')); + } + + /** + * Verifies structural equality and hash-code behavior for compiled nodes. + */ + @Test + @DisplayName("CompiledNode equals and hashCode align for identical structure") + void compiledNodeEqualsAndHashCodeAlignForIdenticalStructure() { + @SuppressWarnings("unchecked") + final CompiledNode[] child = new CompiledNode[1]; + final CompiledNode leaf = new CompiledNode<>(new char[0], new CompiledNode[0], new String[] { "v" }, + new int[] { 1 }); + child[0] = leaf; + + final CompiledNode first = new CompiledNode<>(new char[] { 'a' }, child, new String[] { "x" }, + new int[] { 2 }); + final CompiledNode second = new CompiledNode<>(new char[] { 'a' }, child, new String[] { "x" }, + new int[] { 2 }); + + assertEquals(first, second); + assertEquals(first.hashCode(), second.hashCode()); + } } diff --git a/src/test/java/org/egothor/stemmer/trie/DominantLocalDescriptorTest.java b/src/test/java/org/egothor/stemmer/trie/DominantLocalDescriptorTest.java index aaa250c..df9855f 100644 --- a/src/test/java/org/egothor/stemmer/trie/DominantLocalDescriptorTest.java +++ b/src/test/java/org/egothor/stemmer/trie/DominantLocalDescriptorTest.java @@ -41,7 +41,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link DominantLocalDescriptor}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("DominantLocalDescriptor") class DominantLocalDescriptorTest { diff --git a/src/test/java/org/egothor/stemmer/trie/LocalValueSummaryTest.java b/src/test/java/org/egothor/stemmer/trie/LocalValueSummaryTest.java index bf9ab6c..3c9e3bb 100644 --- a/src/test/java/org/egothor/stemmer/trie/LocalValueSummaryTest.java +++ b/src/test/java/org/egothor/stemmer/trie/LocalValueSummaryTest.java @@ -50,7 +50,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link LocalValueSummary}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("LocalValueSummary") class LocalValueSummaryTest { diff --git a/src/test/java/org/egothor/stemmer/trie/MutableNodeTest.java b/src/test/java/org/egothor/stemmer/trie/MutableNodeTest.java index 56dc468..c936e4d 100644 --- a/src/test/java/org/egothor/stemmer/trie/MutableNodeTest.java +++ b/src/test/java/org/egothor/stemmer/trie/MutableNodeTest.java @@ -44,7 +44,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link MutableNode}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("MutableNode") class MutableNodeTest { diff --git a/src/test/java/org/egothor/stemmer/trie/RankedLocalDescriptorTest.java b/src/test/java/org/egothor/stemmer/trie/RankedLocalDescriptorTest.java index 446a0c9..37abf38 100644 --- a/src/test/java/org/egothor/stemmer/trie/RankedLocalDescriptorTest.java +++ b/src/test/java/org/egothor/stemmer/trie/RankedLocalDescriptorTest.java @@ -41,7 +41,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link RankedLocalDescriptor}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("RankedLocalDescriptor") class RankedLocalDescriptorTest { diff --git a/src/test/java/org/egothor/stemmer/trie/ReducedNodeTest.java b/src/test/java/org/egothor/stemmer/trie/ReducedNodeTest.java index b93a01e..976cb54 100644 --- a/src/test/java/org/egothor/stemmer/trie/ReducedNodeTest.java +++ b/src/test/java/org/egothor/stemmer/trie/ReducedNodeTest.java @@ -48,7 +48,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link ReducedNode}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("ReducedNode") class ReducedNodeTest { diff --git a/src/test/java/org/egothor/stemmer/trie/ReductionContextTest.java b/src/test/java/org/egothor/stemmer/trie/ReductionContextTest.java index 96b729c..bff1c75 100644 --- a/src/test/java/org/egothor/stemmer/trie/ReductionContextTest.java +++ b/src/test/java/org/egothor/stemmer/trie/ReductionContextTest.java @@ -47,7 +47,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link ReductionContext}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("ReductionContext") class ReductionContextTest { diff --git a/src/test/java/org/egothor/stemmer/trie/ReductionSignatureTest.java b/src/test/java/org/egothor/stemmer/trie/ReductionSignatureTest.java index d4a8b56..676f354 100644 --- a/src/test/java/org/egothor/stemmer/trie/ReductionSignatureTest.java +++ b/src/test/java/org/egothor/stemmer/trie/ReductionSignatureTest.java @@ -46,7 +46,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link ReductionSignature}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("ReductionSignature") class ReductionSignatureTest { diff --git a/src/test/java/org/egothor/stemmer/trie/UnorderedLocalDescriptorTest.java b/src/test/java/org/egothor/stemmer/trie/UnorderedLocalDescriptorTest.java index 664ce23..f2bdf0a 100644 --- a/src/test/java/org/egothor/stemmer/trie/UnorderedLocalDescriptorTest.java +++ b/src/test/java/org/egothor/stemmer/trie/UnorderedLocalDescriptorTest.java @@ -41,7 +41,7 @@ import org.junit.jupiter.api.Test; * Unit tests for {@link UnorderedLocalDescriptor}. */ @Tag("unit") -@Tag("fast") +@Tag("trie") @DisplayName("UnorderedLocalDescriptor") class UnorderedLocalDescriptorTest {