mirror of
https://github.com/apache/lucene.git
synced 2025-02-06 10:08:58 +00:00
LCUENE-2016: remap invalid U+FFFF char during indexing, to prevent silent corruption
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831041 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88d3d41992
commit
de0aaadb81
@ -164,6 +164,10 @@ Bug fixes
|
||||
* LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
|
||||
(Benjamin Keil via Mark Miller)
|
||||
|
||||
* LUCENE-2016: Replace illegal U+FFFF character with the replacement
|
||||
char (U+FFFD) during indexing, to prevent silent index corruption.
|
||||
(Peter Keegan, Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-1933: Provide a convenience AttributeFactory that creates a
|
||||
|
@ -377,9 +377,11 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
||||
ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
} else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END)
|
||||
// Unpaired
|
||||
} else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END ||
|
||||
ch == 0xffff)) {
|
||||
// Unpaired or 0xffff
|
||||
ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
code = (code*31) + ch;
|
||||
}
|
||||
|
@ -29,7 +29,6 @@ import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
@ -4523,4 +4522,20 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||
w.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
public void testEmbeddedFFFF() throws Throwable {
|
||||
|
||||
Directory d = new MockRAMDirectory();
|
||||
IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "a a\uffffb", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new Field("field", "a", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
_TestUtil.checkIndex(d);
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user