mirror of https://github.com/apache/lucene.git
LUCENE-2015: add a static foldToASCII method to ASCIIFoldingFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@922277 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f44f23026f
commit
24d4565049
|
@ -83,6 +83,9 @@ API Changes
|
|||
with FuzzyQuery to ensure that exact matches are always scored higher,
|
||||
because only the boost will be used in scoring. (Robert Muir)
|
||||
|
||||
* LUCENE-2015: Add a static method foldToASCII to ASCIIFoldingFilter to
|
||||
expose its folding logic. (Cédrik Lime via Robert Muir)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
|
||||
|
|
|
@ -105,9 +105,24 @@ public final class ASCIIFoldingFilter extends TokenFilter {
|
|||
output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
|
||||
}
|
||||
|
||||
outputPos = 0;
|
||||
outputPos = foldToASCII(input, 0, output, 0, length);
|
||||
}
|
||||
|
||||
for (int pos = 0 ; pos < length ; ++pos) {
|
||||
/**
|
||||
* Converts characters above ASCII to their ASCII equivalents. For example,
|
||||
* accents are removed from accented characters.
|
||||
* @param input The characters to fold
|
||||
* @param inputPos Index of the first character to fold
|
||||
* @param output The result of the folding. Should be of size >= {@code length * 4}.
|
||||
* @param outputPos Index of output where to put the result of the folding
|
||||
* @param length The number of characters to fold
|
||||
* @return length of output
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static final int foldToASCII(char input[], int inputPos, char output[], int outputPos, int length)
|
||||
{
|
||||
final int end = inputPos + length;
|
||||
for (int pos = inputPos; pos < end ; ++pos) {
|
||||
final char c = input[pos];
|
||||
|
||||
// Quick test: if it's not in range then just keep current character
|
||||
|
@ -2028,5 +2043,6 @@ public final class ASCIIFoldingFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
}
|
||||
return outputPos;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue