mirror of https://github.com/apache/lucene.git
LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly hyphenation patterns with indicator >= 7.
This commit is contained in:
parent
fc6f3a45f8
commit
f5e2267097
|
@ -146,6 +146,9 @@ Bug Fixes
|
|||
|
||||
* LUCENE-8130: Fix NullPointerException from TermStates.toString() (Mike McCandless)
|
||||
|
||||
* LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly
|
||||
hyphenation patterns with indicator >= 7. (Holger Bruch via Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
|
||||
|
|
|
@ -89,7 +89,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
|
|||
StringBuilder buf = new StringBuilder();
|
||||
byte v = vspace.get(k++);
|
||||
while (v != 0) {
|
||||
char c = (char) ((v >>> 4) - 1 + '0');
|
||||
char c = (char) (((v & 0xf0 )>>> 4) - 1 + '0');
|
||||
buf.append(c);
|
||||
c = (char) (v & 0x0f);
|
||||
if (c == 0) {
|
||||
|
@ -151,7 +151,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
|
|||
StringBuilder buf = new StringBuilder();
|
||||
byte v = vspace.get(k++);
|
||||
while (v != 0) {
|
||||
char c = (char) ((v >>> 4) - 1);
|
||||
char c = (char) (((v & 0xf0 )>>> 4) - 1);
|
||||
buf.append(c);
|
||||
c = (char) (v & 0x0f);
|
||||
if (c == 0) {
|
||||
|
|
|
@ -262,6 +262,21 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
public void testLucene8124() throws Exception {
|
||||
InputSource is = new InputSource(getClass().getResource("hyphenation-LUCENE-8124.xml").toExternalForm());
|
||||
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
|
||||
.getHyphenationTree(is);
|
||||
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
|
||||
whitespaceMockTokenizer(
|
||||
"Rindfleisch"),
|
||||
hyphenator);
|
||||
|
||||
// TODO Rindfleisch returned twice is another issue of the HyphenationCompoundTokenFilter
|
||||
assertTokenStreamContents(tf, new String[] { "Rindfleisch", "Rind", "Rindfleisch", "fleisch"});
|
||||
}
|
||||
|
||||
|
||||
public static interface MockRetainAttribute extends Attribute {
|
||||
void setRetain(boolean attr);
|
||||
|
|
Loading…
Reference in New Issue