docs: sync and improvements
This commit is contained in:
@@ -61,6 +61,7 @@ import java.util.logging.Logger;
|
||||
* --output <file>
|
||||
* --reduction-mode <mode>
|
||||
* [--store-original]
|
||||
* [--right-to-left]
|
||||
* [--case-processing-mode <mode>]
|
||||
* [--dominant-winner-min-percent <1..100>]
|
||||
* [--dominant-winner-over-second-ratio <1..n>]
|
||||
|
||||
@@ -85,10 +85,25 @@ final class DiacriticStripper {
|
||||
registerSingle("Þ", 'T');
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility class.
|
||||
*/
|
||||
private DiacriticStripper() {
|
||||
throw new AssertionError("No instances.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes supported diacritic marks and common Latin ligatures from the supplied
|
||||
* text.
|
||||
*
|
||||
* <p>
|
||||
* The method returns the original {@link String} instance when no replacement is
|
||||
* required, avoiding an unnecessary allocation on the common ASCII path.
|
||||
* </p>
|
||||
*
|
||||
* @param input text to normalize
|
||||
* @return normalized text, or {@code input} itself when it is already unchanged
|
||||
*/
|
||||
/* default */ static String strip(final String input) {
|
||||
StringBuilder normalized = null;
|
||||
|
||||
@@ -116,6 +131,13 @@ final class DiacriticStripper {
|
||||
return normalized.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the replacement text for one non-ASCII character.
|
||||
*
|
||||
* @param source source character
|
||||
* @return replacement text, or {@code null} when the character should be kept
|
||||
* unchanged
|
||||
*/
|
||||
@SuppressWarnings("PMD.AvoidLiteralsInIfCondition")
|
||||
private static String replacementFor(final char source) {
|
||||
if (source <= 0x007F) {
|
||||
@@ -161,6 +183,12 @@ final class DiacriticStripper {
|
||||
return ascii.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers one-character replacements for a set of source characters.
|
||||
*
|
||||
* @param sourceCharacters characters to replace
|
||||
* @param replacement replacement character
|
||||
*/
|
||||
private static void registerSingle(final String sourceCharacters, final char replacement) {
|
||||
for (int index = 0; index < sourceCharacters.length(); index++) {
|
||||
DIRECT_REPLACEMENTS[sourceCharacters.charAt(index)] = replacement;
|
||||
|
||||
@@ -138,9 +138,9 @@ public final class FrequencyTrie<V> {
|
||||
/**
|
||||
* Creates a new compiled trie instance.
|
||||
*
|
||||
* @param arrayFactory array factory
|
||||
* @param root compiled root node
|
||||
* @param traversalDirection logical key traversal direction
|
||||
* @param arrayFactory array factory
|
||||
* @param root compiled root node
|
||||
* @param metadata trie metadata describing lookup and persistence semantics
|
||||
* @throws NullPointerException if any argument is {@code null}
|
||||
*/
|
||||
private FrequencyTrie(final IntFunction<V[]> arrayFactory, final CompiledNode<V> root,
|
||||
@@ -922,6 +922,13 @@ public final class FrequencyTrie<V> {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies build-time dictionary-key normalization according to the builder
|
||||
* configuration.
|
||||
*
|
||||
* @param key dictionary key
|
||||
* @return normalized key for trie insertion
|
||||
*/
|
||||
private String normalizeDictionaryKey(final String key) {
|
||||
String normalized = key;
|
||||
|
||||
|
||||
@@ -737,6 +737,7 @@ public final class PatchCommandEncoder {
|
||||
* @param targetCharacters target characters
|
||||
* @param sourceLength source length
|
||||
* @param targetLength target length
|
||||
* @param direction traversal direction used to compare characters
|
||||
*/
|
||||
private void fillMatrices(final char[] sourceCharacters, final char[] targetCharacters, final int sourceLength,
|
||||
final int targetLength, final WordTraversalDirection direction) {
|
||||
@@ -988,6 +989,14 @@ public final class PatchCommandEncoder {
|
||||
private int replaceCost = 1;
|
||||
private int matchCost; // = 0
|
||||
|
||||
/**
|
||||
* Creates a builder initialized with the default Egothor-compatible cost model
|
||||
* and backward traversal.
|
||||
*/
|
||||
public Builder() {
|
||||
// Default values are assigned in field initializers.
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets traversal direction used by the created encoder.
|
||||
*
|
||||
@@ -1011,7 +1020,7 @@ public final class PatchCommandEncoder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets cost of an delete operation.
|
||||
* Sets cost of a delete operation.
|
||||
*
|
||||
* @param value cost of the operation
|
||||
* @return this builder
|
||||
@@ -1022,7 +1031,7 @@ public final class PatchCommandEncoder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets cost of an replace operation.
|
||||
* Sets cost of a replace operation.
|
||||
*
|
||||
* @param value cost of the operation
|
||||
* @return this builder
|
||||
@@ -1033,7 +1042,7 @@ public final class PatchCommandEncoder {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets cost of an skip operation.
|
||||
* Sets cost of a match operation.
|
||||
*
|
||||
* @param value cost of the operation
|
||||
* @return this builder
|
||||
|
||||
@@ -217,6 +217,14 @@ public record TrieMetadata(int formatVersion, WordTraversalDirection traversalDi
|
||||
diacriticProcessingMode, caseProcessingMode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a required metadata entry from a parsed text block.
|
||||
*
|
||||
* @param entries parsed metadata entries
|
||||
* @param key required entry key
|
||||
* @return non-blank entry value
|
||||
* @throws IllegalArgumentException if the entry is absent or blank
|
||||
*/
|
||||
private static String requireEntry(final Map<String, String> entries, final String key) {
|
||||
final String value = entries.get(key);
|
||||
if (value == null || value.isBlank()) {
|
||||
|
||||
@@ -60,11 +60,23 @@ import java.util.Objects;
|
||||
this.childSignature = childSignature;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code consistent with descriptor equality.
|
||||
*
|
||||
* @return descriptor hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(this.edge, this.childSignature);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this descriptor with another object.
|
||||
*
|
||||
* @param other object to compare with
|
||||
* @return {@code true} when both descriptors represent the same semantic
|
||||
* reduction identity
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (this == other) {
|
||||
|
||||
@@ -53,11 +53,23 @@ import java.util.Objects;
|
||||
this.dominantValue = dominantValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code consistent with descriptor equality.
|
||||
*
|
||||
* @return descriptor hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(this.dominantValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this descriptor with another object.
|
||||
*
|
||||
* @param other object to compare with
|
||||
* @return {@code true} when both descriptors represent the same semantic
|
||||
* reduction identity
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (this == other) {
|
||||
|
||||
@@ -65,11 +65,23 @@ import java.util.List;
|
||||
Collections.unmodifiableList(Arrays.asList(Arrays.copyOf(orderedValues, orderedValues.length))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code consistent with descriptor equality.
|
||||
*
|
||||
* @return descriptor hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.orderedValues.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this descriptor with another object.
|
||||
*
|
||||
* @param other object to compare with
|
||||
* @return {@code true} when both descriptors represent the same semantic
|
||||
* reduction identity
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (this == other) {
|
||||
|
||||
@@ -67,11 +67,23 @@ import java.util.Set;
|
||||
return new UnorderedLocalDescriptor(Collections.unmodifiableSet(distinct));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code consistent with descriptor equality.
|
||||
*
|
||||
* @return descriptor hash code
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.distinctValues.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares this descriptor with another object.
|
||||
*
|
||||
* @param other object to compare with
|
||||
* @return {@code true} when both descriptors represent the same semantic
|
||||
* reduction identity
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (this == other) {
|
||||
|
||||
Reference in New Issue
Block a user