LUCENE-3736: Some more ParallelAtomicReader cleanups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1291889 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-02-21 16:17:13 +00:00
parent 9527a9e397
commit 208e9626d4
2 changed files with 93 additions and 168 deletions

View File

@ -58,6 +58,7 @@ public final class ParallelAtomicReader extends AtomicReader {
private final int maxDoc, numDocs; private final int maxDoc, numDocs;
private final boolean hasDeletions; private final boolean hasDeletions;
private final SortedMap<String,AtomicReader> fieldToReader = new TreeMap<String,AtomicReader>(); private final SortedMap<String,AtomicReader> fieldToReader = new TreeMap<String,AtomicReader>();
private final SortedMap<String,AtomicReader> tvFieldToReader = new TreeMap<String,AtomicReader>();
/** Create a ParallelAtomicReader based on the provided /** Create a ParallelAtomicReader based on the provided
* readers; auto-closes the given readers on {@link #close()}. */ * readers; auto-closes the given readers on {@link #close()}. */
@ -98,20 +99,36 @@ public final class ParallelAtomicReader extends AtomicReader {
throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc()); throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
} }
} }
// build FieldInfos and fieldToReader map:
for (final AtomicReader reader : this.parallelReaders) { for (final AtomicReader reader : this.parallelReaders) {
final FieldInfos readerFieldInfos = reader.getFieldInfos(); final FieldInfos readerFieldInfos = reader.getFieldInfos();
for(FieldInfo fieldInfo : readerFieldInfos) { // update fieldToReader map for (FieldInfo fieldInfo : readerFieldInfos) {
// NOTE: first reader having a given field "wins": // NOTE: first reader having a given field "wins":
if (!fieldToReader.containsKey(fieldInfo.name)) { if (!fieldToReader.containsKey(fieldInfo.name)) {
fieldInfos.add(fieldInfo); fieldInfos.add(fieldInfo);
fieldToReader.put(fieldInfo.name, reader); fieldToReader.put(fieldInfo.name, reader);
if (fieldInfo.isIndexed) { if (fieldInfo.storeTermVector) {
this.fields.addField(fieldInfo.name, reader.terms(fieldInfo.name)); tvFieldToReader.put(fieldInfo.name, reader);
} }
} }
} }
} }
// build Fields instance
for (final AtomicReader reader : this.parallelReaders) {
final Fields readerFields = reader.fields();
if (readerFields != null) {
final FieldsEnum it = readerFields.iterator();
String name;
while ((name = it.next()) != null) {
// only add if the reader responsible for that field name is the current:
if (fieldToReader.get(name) == reader) {
this.fields.addField(name, it.terms());
}
}
}
}
// do this finally so any Exceptions occurred before don't affect refcounts: // do this finally so any Exceptions occurred before don't affect refcounts:
if (!closeSubReaders) { if (!closeSubReaders) {
@ -233,12 +250,11 @@ public final class ParallelAtomicReader extends AtomicReader {
} }
} }
// get all vectors
@Override @Override
public Fields getTermVectors(int docID) throws IOException { public Fields getTermVectors(int docID) throws IOException {
ensureOpen(); ensureOpen();
ParallelFields fields = null; ParallelFields fields = null;
for (Map.Entry<String,AtomicReader> ent : fieldToReader.entrySet()) { for (Map.Entry<String,AtomicReader> ent : tvFieldToReader.entrySet()) {
String fieldName = ent.getKey(); String fieldName = ent.getKey();
Terms vector = ent.getValue().getTermVector(docID, fieldName); Terms vector = ent.getValue().getTermVector(docID, fieldName);
if (vector != null) { if (vector != null) {

View File

@ -24,178 +24,87 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
public class TestParallelTermEnum extends LuceneTestCase { public class TestParallelTermEnum extends LuceneTestCase {
private AtomicReader ir1; private AtomicReader ir1;
private AtomicReader ir2; private AtomicReader ir2;
private Directory rd1; private Directory rd1;
private Directory rd2; private Directory rd2;
@Override
public void setUp() throws Exception {
super.setUp();
Document doc;
rd1 = newDirectory();
IndexWriter iw1 = new IndexWriter(rd1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
doc = new Document();
doc.add(newField("field1", "the quick brown fox jumps", TextField.TYPE_STORED));
doc.add(newField("field2", "the quick brown fox jumps", TextField.TYPE_STORED));
iw1.addDocument(doc);
iw1.close();
rd2 = newDirectory();
IndexWriter iw2 = new IndexWriter(rd2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
doc = new Document();
doc.add(newField("field1", "the fox jumps over the lazy dog", TextField.TYPE_STORED));
doc.add(newField("field3", "the fox jumps over the lazy dog", TextField.TYPE_STORED));
iw2.addDocument(doc);
iw2.close();
this.ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(rd1));
this.ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(rd2));
}
@Override
public void tearDown() throws Exception {
ir1.close();
ir2.close();
rd1.close();
rd2.close();
super.tearDown();
}
private void checkTerms(Terms terms, Bits liveDocs, String... termsList) throws IOException {
assertNotNull(terms);
final TermsEnum te = terms.iterator(null);
@Override for (String t : termsList) {
public void setUp() throws Exception { BytesRef b = te.next();
super.setUp(); assertNotNull(b);
Document doc; assertEquals(t, b.utf8ToString());
rd1 = newDirectory(); DocsEnum td = _TestUtil.docs(random, te, liveDocs, null, false);
IndexWriter iw1 = new IndexWriter(rd1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
doc = new Document(); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
doc.add(newField("field1", "the quick brown fox jumps", TextField.TYPE_STORED));
doc.add(newField("field2", "the quick brown fox jumps", TextField.TYPE_STORED));
doc.add(newField("field4", "", TextField.TYPE_UNSTORED));
iw1.addDocument(doc);
iw1.close();
rd2 = newDirectory();
IndexWriter iw2 = new IndexWriter(rd2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
doc = new Document();
doc.add(newField("field0", "", TextField.TYPE_UNSTORED));
doc.add(newField("field1", "the fox jumps over the lazy dog", TextField.TYPE_STORED));
doc.add(newField("field3", "the fox jumps over the lazy dog", TextField.TYPE_STORED));
iw2.addDocument(doc);
iw2.close();
this.ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(rd1));
this.ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(rd2));
} }
assertNull(te.next());
}
@Override public void test1() throws IOException {
public void tearDown() throws Exception { ParallelAtomicReader pr = new ParallelAtomicReader(ir1, ir2);
ir1.close();
ir2.close();
rd1.close();
rd2.close();
super.tearDown();
}
public void test1() throws IOException { Bits liveDocs = pr.getLiveDocs();
ParallelAtomicReader pr = new ParallelAtomicReader(ir1, ir2);
Bits liveDocs = pr.getLiveDocs(); FieldsEnum fe = pr.fields().iterator();
FieldsEnum fe = pr.fields().iterator(); String f = fe.next();
assertEquals("field1", f);
checkTerms(fe.terms(), liveDocs, "brown", "fox", "jumps", "quick", "the");
String f = fe.next(); f = fe.next();
assertEquals("field0", f); assertEquals("field2", f);
f = fe.next(); checkTerms(fe.terms(), liveDocs, "brown", "fox", "jumps", "quick", "the");
assertEquals("field1", f);
Terms terms = fe.terms(); f = fe.next();
TermsEnum te = terms.iterator(null); assertEquals("field3", f);
checkTerms(fe.terms(), liveDocs, "dog", "fox", "jumps", "lazy", "over", "the");
assertEquals("brown", te.next().utf8ToString()); assertNull(fe.next());
DocsEnum td = _TestUtil.docs(random, te, liveDocs, null, false); }
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("quick", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertNull(te.next());
f = fe.next();
assertEquals("field2", f);
terms = fe.terms();
assertNotNull(terms);
te = terms.iterator(null);
assertEquals("brown", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("quick", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertNull(te.next());
f = fe.next();
assertEquals("field3", f);
terms = fe.terms();
assertNotNull(terms);
te = terms.iterator(null);
assertEquals("dog", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("lazy", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("over", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertNull(te.next());
}
} }