mirror of https://github.com/apache/lucene.git
LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly hyphenation patterns with indicator >= 7.
This commit is contained in:
parent
fc6f3a45f8
commit
f5e2267097
|
@ -146,6 +146,9 @@ Bug Fixes
|
||||||
|
|
||||||
* LUCENE-8130: Fix NullPointerException from TermStates.toString() (Mike McCandless)
|
* LUCENE-8130: Fix NullPointerException from TermStates.toString() (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly
|
||||||
|
hyphenation patterns with indicator >= 7. (Holger Bruch via Adrien Grand)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
|
* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
|
||||||
StringBuilder buf = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
byte v = vspace.get(k++);
|
byte v = vspace.get(k++);
|
||||||
while (v != 0) {
|
while (v != 0) {
|
||||||
char c = (char) ((v >>> 4) - 1 + '0');
|
char c = (char) (((v & 0xf0 )>>> 4) - 1 + '0');
|
||||||
buf.append(c);
|
buf.append(c);
|
||||||
c = (char) (v & 0x0f);
|
c = (char) (v & 0x0f);
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
|
@ -151,7 +151,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
|
||||||
StringBuilder buf = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
byte v = vspace.get(k++);
|
byte v = vspace.get(k++);
|
||||||
while (v != 0) {
|
while (v != 0) {
|
||||||
char c = (char) ((v >>> 4) - 1);
|
char c = (char) (((v & 0xf0 )>>> 4) - 1);
|
||||||
buf.append(c);
|
buf.append(c);
|
||||||
c = (char) (v & 0x0f);
|
c = (char) (v & 0x0f);
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
|
|
|
@ -263,6 +263,21 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLucene8124() throws Exception {
|
||||||
|
InputSource is = new InputSource(getClass().getResource("hyphenation-LUCENE-8124.xml").toExternalForm());
|
||||||
|
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
|
||||||
|
.getHyphenationTree(is);
|
||||||
|
|
||||||
|
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
|
||||||
|
whitespaceMockTokenizer(
|
||||||
|
"Rindfleisch"),
|
||||||
|
hyphenator);
|
||||||
|
|
||||||
|
// TODO Rindfleisch returned twice is another issue of the HyphenationCompoundTokenFilter
|
||||||
|
assertTokenStreamContents(tf, new String[] { "Rindfleisch", "Rind", "Rindfleisch", "fleisch"});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static interface MockRetainAttribute extends Attribute {
|
public static interface MockRetainAttribute extends Attribute {
|
||||||
void setRetain(boolean attr);
|
void setRetain(boolean attr);
|
||||||
boolean getRetain();
|
boolean getRetain();
|
||||||
|
|
Loading…
Reference in New Issue