mirror of https://github.com/apache/lucene.git
LUCENE-510: fix backwards compatibility bug when bulk-merging stored fields from pre-UTF8 segments that contain non-ascii stored fields
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@654774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5a4f2572c0
commit
39651e029d
|
@ -172,6 +172,10 @@ final class FieldsReader {
|
||||||
indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
|
indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean canReadRawDocs() {
|
||||||
|
return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||||
seekIndex(n);
|
seekIndex(n);
|
||||||
long position = indexStream.readLong();
|
long position = indexStream.readLong();
|
||||||
|
|
|
@ -302,8 +302,14 @@ final class SegmentMerger {
|
||||||
final FieldsReader matchingFieldsReader;
|
final FieldsReader matchingFieldsReader;
|
||||||
final boolean hasMatchingReader;
|
final boolean hasMatchingReader;
|
||||||
if (matchingSegmentReader != null) {
|
if (matchingSegmentReader != null) {
|
||||||
|
final FieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
|
||||||
|
if (fieldsReader != null && !fieldsReader.canReadRawDocs()) {
|
||||||
|
matchingFieldsReader = null;
|
||||||
|
hasMatchingReader = false;
|
||||||
|
} else {
|
||||||
|
matchingFieldsReader = fieldsReader;
|
||||||
hasMatchingReader = true;
|
hasMatchingReader = true;
|
||||||
matchingFieldsReader = matchingSegmentReader.getFieldsReader();
|
}
|
||||||
} else {
|
} else {
|
||||||
hasMatchingReader = false;
|
hasMatchingReader = false;
|
||||||
matchingFieldsReader = null;
|
matchingFieldsReader = null;
|
||||||
|
|
|
@ -129,6 +129,22 @@ public class TestBackwardsCompatibility extends LuceneTestCase
|
||||||
"23.nocfs",
|
"23.nocfs",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
public void testOptimizeOldIndex() throws IOException {
|
||||||
|
for(int i=0;i<oldNames.length;i++) {
|
||||||
|
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
||||||
|
unzip(dirName, oldNames[i]);
|
||||||
|
String fullPath = fullDir(oldNames[i]);
|
||||||
|
Directory dir = FSDirectory.getDirectory(fullPath);
|
||||||
|
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
w.optimize();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
_TestUtil.checkIndex(dir);
|
||||||
|
dir.close();
|
||||||
|
rmDir(oldNames[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testSearchOldIndex() throws IOException {
|
public void testSearchOldIndex() throws IOException {
|
||||||
for(int i=0;i<oldNames.length;i++) {
|
for(int i=0;i<oldNames.length;i++) {
|
||||||
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
||||||
|
@ -190,13 +206,16 @@ public class TestBackwardsCompatibility extends LuceneTestCase
|
||||||
Document d = reader.document(i);
|
Document d = reader.document(i);
|
||||||
List fields = d.getFields();
|
List fields = d.getFields();
|
||||||
if (oldName.startsWith("23.")) {
|
if (oldName.startsWith("23.")) {
|
||||||
assertEquals(3, fields.size());
|
assertEquals(4, fields.size());
|
||||||
Field f = (Field) d.getField("id");
|
Field f = (Field) d.getField("id");
|
||||||
assertEquals(""+i, f.stringValue());
|
assertEquals(""+i, f.stringValue());
|
||||||
|
|
||||||
f = (Field) d.getField("utf8");
|
f = (Field) d.getField("utf8");
|
||||||
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
|
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
|
||||||
|
|
||||||
|
f = (Field) d.getField("autf8");
|
||||||
|
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
|
||||||
|
|
||||||
f = (Field) d.getField("content2");
|
f = (Field) d.getField("content2");
|
||||||
assertEquals("here is more content with aaa aaa aaa", f.stringValue());
|
assertEquals("here is more content with aaa aaa aaa", f.stringValue());
|
||||||
}
|
}
|
||||||
|
@ -214,7 +233,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase
|
||||||
|
|
||||||
testHits(hits, 34, searcher.getIndexReader());
|
testHits(hits, 34, searcher.getIndexReader());
|
||||||
|
|
||||||
if (oldName.startsWith("23.")) {
|
if (!oldName.startsWith("19.") &&
|
||||||
|
!oldName.startsWith("20.") &&
|
||||||
|
!oldName.startsWith("21.") &&
|
||||||
|
!oldName.startsWith("22.")) {
|
||||||
|
// Test on indices >= 2.3
|
||||||
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")));
|
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")));
|
||||||
assertEquals(34, hits.length());
|
assertEquals(34, hits.length());
|
||||||
hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")));
|
hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")));
|
||||||
|
@ -455,6 +478,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
|
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||||
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||||
doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -54,13 +54,16 @@ public class _TestUtil {
|
||||||
((ConcurrentMergeScheduler) ms).sync();
|
((ConcurrentMergeScheduler) ms).sync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** This runs the CheckIndex tool on the index in. If any
|
||||||
|
* issues are hit, a RuntimeException is thrown; else,
|
||||||
|
* true is returned. */
|
||||||
public static boolean checkIndex(Directory dir) throws IOException {
|
public static boolean checkIndex(Directory dir) throws IOException {
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
CheckIndex.out = new PrintStream(bos);
|
CheckIndex.out = new PrintStream(bos);
|
||||||
if (!CheckIndex.check(dir, false, null)) {
|
if (!CheckIndex.check(dir, false, null)) {
|
||||||
System.out.println("CheckIndex failed");
|
System.out.println("CheckIndex failed");
|
||||||
System.out.println(bos.toString());
|
System.out.println(bos.toString());
|
||||||
return false;
|
throw new RuntimeException("CheckIndex failed");
|
||||||
} else
|
} else
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue