LUCENE-6062: pass correct fieldinfos to dv producer when the segment has updates

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1640471 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-11-19 03:08:34 +00:00
parent bf6287116a
commit 76f38c91b7
4 changed files with 114 additions and 17 deletions

View File

@ -37,8 +37,7 @@ final class SegmentDocValues {
private final Map<Long,RefCount<DocValuesProducer>> genDVProducers = new HashMap<>();
private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, IOContext context, Directory dir,
DocValuesFormat dvFormat, final Long gen, FieldInfos infos) throws IOException {
private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException {
Directory dvDir = dir;
String segmentSuffix = "";
if (gen.longValue() != -1) {
@ -47,7 +46,8 @@ final class SegmentDocValues {
}
// set SegmentReadState to list only the fields that are relevant to that gen
SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, context, segmentSuffix);
SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, IOContext.READ, segmentSuffix);
DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
return new RefCount<DocValuesProducer>(dvFormat.fieldsProducer(srs)) {
@SuppressWarnings("synthetic-access")
@Override
@ -61,11 +61,10 @@ final class SegmentDocValues {
}
/** Returns the {@link DocValuesProducer} for the given generation. */
synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, IOContext context, Directory dir,
DocValuesFormat dvFormat, FieldInfos infos) throws IOException {
synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, Directory dir, FieldInfos infos) throws IOException {
RefCount<DocValuesProducer> dvp = genDVProducers.get(gen);
if (dvp == null) {
dvp = newDocValuesProducer(si, context, dir, dvFormat, gen, infos);
dvp = newDocValuesProducer(si, dir, gen, infos);
assert dvp != null;
genDVProducers.put(gen, dvp);
} else {

View File

@ -26,10 +26,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
@ -48,26 +46,35 @@ class SegmentDocValuesProducer extends DocValuesProducer {
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
final List<Long> dvGens = new ArrayList<>();
SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos fieldInfos, SegmentDocValues segDocValues, DocValuesFormat dvFormat) throws IOException {
/**
* Creates a new producer that handles updated docvalues fields
* @param si commit point
* @param dir directory
* @param coreInfos fieldinfos for the segment
* @param allInfos all fieldinfos including updated ones
* @param segDocValues producer map
*/
SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos coreInfos, FieldInfos allInfos, SegmentDocValues segDocValues) throws IOException {
boolean success = false;
try {
DocValuesProducer baseProducer = null;
for (FieldInfo fi : fieldInfos) {
for (FieldInfo fi : allInfos) {
if (fi.getDocValuesType() == DocValuesType.NONE) {
continue;
}
long docValuesGen = fi.getDocValuesGen();
if (docValuesGen == -1) {
if (baseProducer == null) {
// the base producer gets all the fields, so the Codec can validate properly
baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
// the base producer gets the original fieldinfos it wrote
baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, dir, coreInfos);
dvGens.add(docValuesGen);
dvProducers.add(baseProducer);
}
dvProducersByField.put(fi.name, baseProducer);
} else {
assert !dvGens.contains(docValuesGen);
final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(new FieldInfo[] { fi }));
// otherwise, producer sees only the one fieldinfo it wrote
final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, new FieldInfos(new FieldInfo[] { fi }));
dvGens.add(docValuesGen);
dvProducers.add(dvp);
dvProducersByField.put(fi.name, dvp);

View File

@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsProducer;
@ -159,15 +158,14 @@ public final class SegmentReader extends LeafReader implements Accountable {
*/
private DocValuesProducer initDocValuesProducer() throws IOException {
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
final DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
if (!fieldInfos.hasDocValues()) {
return null;
} else if (si.hasFieldUpdates()) {
return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
} else {
// simple case, no DocValues updates
return segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
}
}

View File

@ -19,6 +19,13 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.NRTCachingDirectory;
@ -781,6 +788,92 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
dir.close();
}
@Test
public void testUpdateSegmentWithNoDocValues2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// prevent merges, otherwise by the time updates are applied
// (writer.close()), the segments might have merged and that update becomes
// legit.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
IndexWriter writer = new IndexWriter(dir, conf);
// first segment with NDV
Document doc = new Document();
doc.add(new StringField("id", "doc0", Store.NO));
doc.add(new NumericDocValuesField("ndv", 3));
writer.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field
writer.addDocument(doc);
writer.commit();
// second segment with no NDV, but another dv field "foo"
doc = new Document();
doc.add(new StringField("id", "doc1", Store.NO));
doc.add(new NumericDocValuesField("foo", 3));
writer.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated
writer.addDocument(doc);
writer.commit();
// update document in the first segment - should not affect docsWithField of
// the document without NDV field
writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
// update document in the second segment - field should be added and we should
// be able to handle the other document correctly (e.g. no NPE)
writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
for (LeafReaderContext context : reader.leaves()) {
LeafReader r = context.reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
Bits docsWithField = r.getDocsWithField("ndv");
assertNotNull(docsWithField);
assertTrue(docsWithField.get(0));
assertEquals(5L, ndv.get(0));
assertFalse(docsWithField.get(1));
assertEquals(0L, ndv.get(1));
}
reader.close();
TestUtil.checkIndex(dir);
conf = newIndexWriterConfig(new MockAnalyzer(random()));
writer = new IndexWriter(dir, conf);
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(dir);
LeafReader ar = getOnlySegmentReader(reader);
assertEquals(DocValuesType.NUMERIC, ar.getFieldInfos().fieldInfo("foo").getDocValuesType());
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs td;
// doc0
td = searcher.search(new TermQuery(new Term("id", "doc0")), 1,
new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
// doc1
td = searcher.search(new TermQuery(new Term("id", "doc1")), 1,
new Sort(new SortField("ndv", SortField.Type.LONG), new SortField("foo", SortField.Type.LONG)));
assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
assertEquals(3L, ((FieldDoc)td.scoreDocs[0]).fields[1]);
// doc2
td = searcher.search(new TermQuery(new Term("id", "doc2")), 1,
new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
// doc4
td = searcher.search(new TermQuery(new Term("id", "doc4")), 1,
new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
reader.close();
dir.close();
}
@Test
public void testUpdateSegmentWithPostingButNoDocValues() throws Exception {
Directory dir = newDirectory();