docs: sync and improvements

This commit is contained in:
2026-04-26 18:55:25 +02:00
parent 48f21cab72
commit 5a511374f3
13 changed files with 130 additions and 21 deletions

View File

@@ -61,6 +61,7 @@ import java.util.logging.Logger;
* --output <file>
* --reduction-mode <mode>
* [--store-original]
* [--right-to-left]
* [--case-processing-mode <mode>]
* [--dominant-winner-min-percent <1..100>]
* [--dominant-winner-over-second-ratio <1..n>]

View File

@@ -85,10 +85,25 @@ final class DiacriticStripper {
registerSingle("Þ", 'T');
}
/**
* Utility class.
*/
private DiacriticStripper() {
throw new AssertionError("No instances.");
}
/**
* Removes supported diacritic marks and common Latin ligatures from the supplied
* text.
*
* <p>
* The method returns the original {@link String} instance when no replacement is
* required, avoiding an unnecessary allocation on the common ASCII path.
* </p>
*
* @param input text to normalize
* @return normalized text, or {@code input} itself when it is already unchanged
*/
/* default */ static String strip(final String input) {
StringBuilder normalized = null;
@@ -116,6 +131,13 @@ final class DiacriticStripper {
return normalized.toString();
}
/**
* Returns the replacement text for one non-ASCII character.
*
* @param source source character
* @return replacement text, or {@code null} when the character should be kept
* unchanged
*/
@SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
private static String replacementFor(final char source) {
if (source <= 0x007F) {
@@ -161,6 +183,12 @@ final class DiacriticStripper {
return ascii.toString();
}
/**
* Registers one-character replacements for a set of source characters.
*
* @param sourceCharacters characters to replace
* @param replacement replacement character
*/
private static void registerSingle(final String sourceCharacters, final char replacement) {
for (int index = 0; index < sourceCharacters.length(); index++) {
DIRECT_REPLACEMENTS[sourceCharacters.charAt(index)] = replacement;

View File

@@ -138,9 +138,9 @@ public final class FrequencyTrie<V> {
/**
* Creates a new compiled trie instance.
*
* @param arrayFactory array factory
* @param root compiled root node
* @param traversalDirection logical key traversal direction
* @param arrayFactory array factory
* @param root compiled root node
* @param metadata trie metadata describing lookup and persistence semantics
* @throws NullPointerException if any argument is {@code null}
*/
private FrequencyTrie(final IntFunction<V[]> arrayFactory, final CompiledNode<V> root,
@@ -922,6 +922,13 @@ public final class FrequencyTrie<V> {
return this;
}
/**
* Applies build-time dictionary-key normalization according to the builder
* configuration.
*
* @param key dictionary key
* @return normalized key for trie insertion
*/
private String normalizeDictionaryKey(final String key) {
String normalized = key;

View File

@@ -737,6 +737,7 @@ public final class PatchCommandEncoder {
* @param targetCharacters target characters
* @param sourceLength source length
* @param targetLength target length
* @param direction traversal direction used to compare characters
*/
private void fillMatrices(final char[] sourceCharacters, final char[] targetCharacters, final int sourceLength,
final int targetLength, final WordTraversalDirection direction) {
@@ -988,6 +989,14 @@ public final class PatchCommandEncoder {
private int replaceCost = 1;
private int matchCost; // = 0
/**
* Creates a builder initialized with the default Egothor-compatible cost model
* and backward traversal.
*/
public Builder() {
// Default values are assigned in field initializers.
}
/**
* Sets traversal direction used by the created encoder.
*
@@ -1011,7 +1020,7 @@ public final class PatchCommandEncoder {
}
/**
* Sets cost of an delete operation.
* Sets cost of a delete operation.
*
* @param value cost of the operation
* @return this builder
@@ -1022,7 +1031,7 @@ public final class PatchCommandEncoder {
}
/**
* Sets cost of an replace operation.
* Sets cost of a replace operation.
*
* @param value cost of the operation
* @return this builder
@@ -1033,7 +1042,7 @@ public final class PatchCommandEncoder {
}
/**
* Sets cost of an skip operation.
* Sets cost of a match operation.
*
* @param value cost of the operation
* @return this builder

View File

@@ -217,6 +217,14 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
diacriticProcessingMode, caseProcessingMode);
}
/**
* Returns a required metadata entry from a parsed text block.
*
* @param entries parsed metadata entries
* @param key required entry key
* @return non-blank entry value
* @throws IllegalArgumentException if the entry is absent or blank
*/
private static String requireEntry(final Map<String, String> entries, final String key) {
final String value = entries.get(key);
if (value == null || value.isBlank()) {

View File

@@ -60,11 +60,23 @@ import java.util.Objects;
this.childSignature = childSignature;
}
/**
* Returns a hash code consistent with descriptor equality.
*
* @return descriptor hash code
*/
@Override
public int hashCode() {
return Objects.hash(this.edge, this.childSignature);
}
/**
* Compares this descriptor with another object.
*
* @param other object to compare with
* @return {@code true} when both descriptors represent the same semantic
* reduction identity
*/
@Override
public boolean equals(final Object other) {
if (this == other) {

View File

@@ -53,11 +53,23 @@ import java.util.Objects;
this.dominantValue = dominantValue;
}
/**
* Returns a hash code consistent with descriptor equality.
*
* @return descriptor hash code
*/
@Override
public int hashCode() {
return Objects.hashCode(this.dominantValue);
}
/**
* Compares this descriptor with another object.
*
* @param other object to compare with
* @return {@code true} when both descriptors represent the same semantic
* reduction identity
*/
@Override
public boolean equals(final Object other) {
if (this == other) {

View File

@@ -65,11 +65,23 @@ import java.util.List;
Collections.unmodifiableList(Arrays.asList(Arrays.copyOf(orderedValues, orderedValues.length))));
}
/**
* Returns a hash code consistent with descriptor equality.
*
* @return descriptor hash code
*/
@Override
public int hashCode() {
return this.orderedValues.hashCode();
}
/**
* Compares this descriptor with another object.
*
* @param other object to compare with
* @return {@code true} when both descriptors represent the same semantic
* reduction identity
*/
@Override
public boolean equals(final Object other) {
if (this == other) {

View File

@@ -67,11 +67,23 @@ import java.util.Set;
return new UnorderedLocalDescriptor(Collections.unmodifiableSet(distinct));
}
/**
* Returns a hash code consistent with descriptor equality.
*
* @return descriptor hash code
*/
@Override
public int hashCode() {
return this.distinctValues.hashCode();
}
/**
* Compares this descriptor with another object.
*
* @param other object to compare with
* @return {@code true} when both descriptors represent the same semantic
* reduction identity
*/
@Override
public boolean equals(final Object other) {
if (this == other) {