LUCENE-510: fix backwards compatibility bug when bulk-merging stored fields from pre-UTF8 segments that contain non-ascii stored fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@654774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-05-09 12:04:46 +00:00
parent 5a4f2572c0
commit 39651e029d
6 changed files with 42 additions and 5 deletions

View File

@ -172,6 +172,10 @@ final class FieldsReader {
indexStream.seek(formatSize + (docID + docStoreOffset) * 8L); indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
} }
boolean canReadRawDocs() {
return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
}
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
seekIndex(n); seekIndex(n);
long position = indexStream.readLong(); long position = indexStream.readLong();

View File

@ -302,8 +302,14 @@ final class SegmentMerger {
final FieldsReader matchingFieldsReader; final FieldsReader matchingFieldsReader;
final boolean hasMatchingReader; final boolean hasMatchingReader;
if (matchingSegmentReader != null) { if (matchingSegmentReader != null) {
final FieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
if (fieldsReader != null && !fieldsReader.canReadRawDocs()) {
matchingFieldsReader = null;
hasMatchingReader = false;
} else {
matchingFieldsReader = fieldsReader;
hasMatchingReader = true; hasMatchingReader = true;
matchingFieldsReader = matchingSegmentReader.getFieldsReader(); }
} else { } else {
hasMatchingReader = false; hasMatchingReader = false;
matchingFieldsReader = null; matchingFieldsReader = null;

View File

@ -129,6 +129,22 @@ public class TestBackwardsCompatibility extends LuceneTestCase
"23.nocfs", "23.nocfs",
}; };
public void testOptimizeOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
String fullPath = fullDir(oldNames[i]);
Directory dir = FSDirectory.getDirectory(fullPath);
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.optimize();
w.close();
_TestUtil.checkIndex(dir);
dir.close();
rmDir(oldNames[i]);
}
}
public void testSearchOldIndex() throws IOException { public void testSearchOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) { for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i]; String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
@ -190,13 +206,16 @@ public class TestBackwardsCompatibility extends LuceneTestCase
Document d = reader.document(i); Document d = reader.document(i);
List fields = d.getFields(); List fields = d.getFields();
if (oldName.startsWith("23.")) { if (oldName.startsWith("23.")) {
assertEquals(3, fields.size()); assertEquals(4, fields.size());
Field f = (Field) d.getField("id"); Field f = (Field) d.getField("id");
assertEquals(""+i, f.stringValue()); assertEquals(""+i, f.stringValue());
f = (Field) d.getField("utf8"); f = (Field) d.getField("utf8");
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
f = (Field) d.getField("autf8");
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
f = (Field) d.getField("content2"); f = (Field) d.getField("content2");
assertEquals("here is more content with aaa aaa aaa", f.stringValue()); assertEquals("here is more content with aaa aaa aaa", f.stringValue());
} }
@ -214,7 +233,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase
testHits(hits, 34, searcher.getIndexReader()); testHits(hits, 34, searcher.getIndexReader());
if (oldName.startsWith("23.")) { if (!oldName.startsWith("19.") &&
!oldName.startsWith("20.") &&
!oldName.startsWith("21.") &&
!oldName.startsWith("22.")) {
// Test on indices >= 2.3
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000"))); hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")));
assertEquals(34, hits.length()); assertEquals(34, hits.length());
hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne"))); hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")));
@ -455,6 +478,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase
Document doc = new Document(); Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc); writer.addDocument(doc);

View File

@ -54,13 +54,16 @@ public class _TestUtil {
((ConcurrentMergeScheduler) ms).sync(); ((ConcurrentMergeScheduler) ms).sync();
} }
/** This runs the CheckIndex tool on the index in. If any
* issues are hit, a RuntimeException is thrown; else,
* true is returned. */
public static boolean checkIndex(Directory dir) throws IOException { public static boolean checkIndex(Directory dir) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex.out = new PrintStream(bos); CheckIndex.out = new PrintStream(bos);
if (!CheckIndex.check(dir, false, null)) { if (!CheckIndex.check(dir, false, null)) {
System.out.println("CheckIndex failed"); System.out.println("CheckIndex failed");
System.out.println(bos.toString()); System.out.println(bos.toString());
return false; throw new RuntimeException("CheckIndex failed");
} else } else
return true; return true;
} }