LUCENE-5824: hunspell FLAG LONG implemented incorrectly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1610705 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-07-15 14:15:43 +00:00
parent 7087d374fe
commit 7bb3f6044e
4 changed files with 13 additions and 4 deletions

View File

@ -163,6 +163,8 @@ Bug Fixes
match affixes, words are only stripped to a zero-length string if FULLSTRIP option
is specifed in the dictionary. (Robert Muir)
* LUCENE-5824: Fix hunspell 'long' flag handling. (Robert Muir)
Test Framework
* LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).

View File

@ -1063,8 +1063,6 @@ public class Dictionary {
/**
* Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded as two ASCII characters whose codes
* must be combined into a single character.
*
* TODO (rmuir) test
*/
private static class DoubleASCIIFlagParsingStrategy extends FlagParsingStrategy {
@ -1079,8 +1077,13 @@ public class Dictionary {
throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
}
for (int i = 0; i < rawFlags.length(); i+=2) {
char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
builder.append(cookedFlag);
char f1 = rawFlags.charAt(i);
char f2 = rawFlags.charAt(i+1);
if (f1 >= 256 || f2 >= 256) {
throw new IllegalArgumentException("Invalid flags (LONG flags must be double ASCII): " + rawFlags);
}
char combined = (char) (f1 << 8 | f2);
builder.append(combined);
}
char flags[] = new char[builder.length()];

View File

@ -29,5 +29,6 @@ public class TestFlagLong extends StemmerTestBase {
assertStemsTo("foo", "foo");
assertStemsTo("foos", "foo");
assertStemsTo("fooss");
assertStemsTo("foobogus");
}
}

View File

@ -2,3 +2,6 @@ SET UTF-8
FLAG long
SFX Y1 Y 1
SFX Y1 0 s .
SFX 1Y Y 1
SFX 1Y 0 bogus .