From d226a973f715183ccd5aa6d643562b15bb37d7c1 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 9 Jan 2015 12:10:28 -0500 Subject: [PATCH] core: upgrade to lucene 5 r1650327. refactor _version docvalues migration to be more efficient. closes #9206 --- pom.xml | 4 +- .../index/memory/ExtendedMemoryIndex.java | 4 +- .../index/engine/internal/InternalEngine.java | 2 +- .../policy/ElasticsearchMergePolicy.java | 111 ++---------- .../merge/policy/FilterDocValuesProducer.java | 157 ++++++++++++++++ .../merge/policy/VersionFieldUpgrader.java | 168 ++++++++++++++++++ .../percolator/PercolatorService.java | 3 +- .../percolator/QueryCollector.java | 8 +- .../policy/VersionFieldUpgraderTest.java | 144 +++++++++++++++ .../search/child/ChildrenQueryTests.java | 5 +- .../index/search/child/ParentQueryTests.java | 5 +- .../test/ElasticsearchLuceneTestCase.java | 2 - 12 files changed, 496 insertions(+), 117 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/merge/policy/FilterDocValuesProducer.java create mode 100644 src/main/java/org/elasticsearch/index/merge/policy/VersionFieldUpgrader.java create mode 100644 src/test/java/org/elasticsearch/index/merge/policy/VersionFieldUpgraderTest.java diff --git a/pom.xml b/pom.xml index 19438b9732f..1406107f057 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ 5.0.0 - 5.0.0-snapshot-1649544 + 5.0.0-snapshot-1650327 auto true onerror @@ -54,7 +54,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1649544 + https://download.elasticsearch.org/lucenesnapshots/1650327 diff --git a/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java b/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java index 5f99fcefa94..aec1bc75519 100644 --- a/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java +++ b/src/main/java/org/apache/lucene/index/memory/ExtendedMemoryIndex.java @@ -24,8 +24,8 @@ package org.apache.lucene.index.memory; */ public final class ExtendedMemoryIndex extends MemoryIndex { - public ExtendedMemoryIndex(boolean storeOffsets, long maxReusedBytes) { - super(storeOffsets, maxReusedBytes); + public ExtendedMemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) { + super(storeOffsets, storePayloads, maxReusedBytes); } } diff --git a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java index e23452d2005..cffc1d06bd8 100644 --- a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java @@ -1107,7 +1107,7 @@ public class InternalEngine implements Engine { for (LeafReaderContext reader : searcher.reader().leaves()) { final SegmentReader segmentReader = segmentReader(reader.reader()); stats.add(1, segmentReader.ramBytesUsed()); - stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.fields())); + stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.getPostingsReader())); stats.addStoredFieldsMemoryInBytes(guardedRamBytesUsed(segmentReader.getFieldsReader())); stats.addTermVectorsMemoryInBytes(guardedRamBytesUsed(segmentReader.getTermVectorsReader())); stats.addNormsMemoryInBytes(guardedRamBytesUsed(segmentReader.getNormsReader())); diff --git a/src/main/java/org/elasticsearch/index/merge/policy/ElasticsearchMergePolicy.java b/src/main/java/org/elasticsearch/index/merge/policy/ElasticsearchMergePolicy.java index d9fb79a5b4b..d53a809163d 100644 --- a/src/main/java/org/elasticsearch/index/merge/policy/ElasticsearchMergePolicy.java +++ b/src/main/java/org/elasticsearch/index/merge/policy/ElasticsearchMergePolicy.java @@ -19,24 +19,15 @@ package org.elasticsearch.index.merge.policy; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.packed.GrowableWriter; -import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.Version; -import org.elasticsearch.common.Numbers; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.index.mapper.internal.VersionFieldMapper; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; @@ -66,93 +57,12 @@ public final class ElasticsearchMergePolicy extends MergePolicy { } /** Return an "upgraded" view of the reader. */ - static LeafReader filter(LeafReader reader) throws IOException { - final FieldInfos fieldInfos = reader.getFieldInfos(); - final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME); - if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) { - // the reader is a recent one, it has versions and they are stored - // in a numeric doc values field - return reader; - } - // The segment is an old one, load all versions in memory and hide - // them behind a numeric doc values field - final Terms terms = reader.terms(UidFieldMapper.NAME); - if (terms == null || !terms.hasPayloads()) { - // The segment doesn't have an _uid field or doesn't have paylods - // don't try to do anything clever. If any other segment has versions - // all versions of this segment will be initialized to 0 - return reader; - } - final TermsEnum uids = terms.iterator(null); - final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT); - DocsAndPositionsEnum dpe = null; - for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) { - dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); - assert dpe != null : "field has payloads"; - for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) { - dpe.nextPosition(); - final BytesRef payload = dpe.getPayload(); - if (payload != null && payload.length == 8) { - final long version = Numbers.bytesToLong(payload); - versions.set(doc, version); - break; - } - } - } - // Build new field infos, doc values, and return a filter reader - final FieldInfo newVersionInfo; - if (versionInfo == null) { - // Find a free field number - int fieldNumber = 0; - for (FieldInfo fi : fieldInfos) { - fieldNumber = Math.max(fieldNumber, fi.number + 1); - } - // TODO: lots of things can wrong here... - newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, // field name - fieldNumber, // field number - false, // store term vectors - false, // omit norms - false, // store payloads - IndexOptions.NONE, // index options - DocValuesType.NUMERIC, // docvalues - -1, // docvalues generation - Collections.emptyMap() // attributes - ); - } else { - newVersionInfo = versionInfo; - } - newVersionInfo.checkConsistency(); // fail merge immediately if above code is wrong - final ArrayList fieldInfoList = new ArrayList<>(); - for (FieldInfo info : fieldInfos) { - if (info != versionInfo) { - fieldInfoList.add(info); - } - } - fieldInfoList.add(newVersionInfo); - final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()])); - final NumericDocValues versionValues = new NumericDocValues() { - @Override - public long get(int index) { - return versions.get(index); - } - }; - return new FilterLeafReader(reader) { - @Override - public FieldInfos getFieldInfos() { - return newFieldInfos; - } - @Override - public NumericDocValues getNumericDocValues(String field) throws IOException { - if (VersionFieldMapper.NAME.equals(field)) { - return versionValues; - } - return super.getNumericDocValues(field); - } - @Override - public Bits getDocsWithField(String field) throws IOException { - return new Bits.MatchAllBits(in.maxDoc()); - } - }; + static CodecReader filter(CodecReader reader) throws IOException { + // convert 0.90.x _uid payloads to _version docvalues if needed + reader = VersionFieldUpgrader.wrap(reader); + // TODO: remove 0.90.x/1.x freqs/prox/payloads from _uid? + // the previous code never did this, so some indexes carry around trash. + return reader; } static class IndexUpgraderOneMerge extends OneMerge { @@ -162,13 +72,12 @@ public final class ElasticsearchMergePolicy extends MergePolicy { } @Override - public List getMergeReaders() throws IOException { - final List readers = super.getMergeReaders(); - ImmutableList.Builder newReaders = ImmutableList.builder(); - for (LeafReader reader : readers) { + public List getMergeReaders() throws IOException { + final List newReaders = new ArrayList<>(); + for (CodecReader reader : super.getMergeReaders()) { newReaders.add(filter(reader)); } - return newReaders.build(); + return newReaders; } } diff --git a/src/main/java/org/elasticsearch/index/merge/policy/FilterDocValuesProducer.java b/src/main/java/org/elasticsearch/index/merge/policy/FilterDocValuesProducer.java new file mode 100644 index 00000000000..ea13e16f9df --- /dev/null +++ b/src/main/java/org/elasticsearch/index/merge/policy/FilterDocValuesProducer.java @@ -0,0 +1,157 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.merge.policy; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + +// TODO: move this to lucene's FilterCodecReader + +/** + * Base class for filtering DocValuesProducer implementations. + *

+ * NOTE: just like with DocValuesProducer, the default {@link #getMergeInstance()} + * is unoptimized. overriding this method when possible can improve performance. + */ +class FilterDocValuesProducer extends DocValuesProducer { + /** The underlying Producer instance. */ + protected final DocValuesProducer in; + + /** + * Creates a new FilterDocValuesProducer + * @param in the underlying producer. + */ + FilterDocValuesProducer(DocValuesProducer in) { + this.in = in; + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long ramBytesUsed() { + return in.ramBytesUsed(); + } + + @Override + public Collection getChildResources() { + return in.getChildResources(); + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return in.getNumeric(field); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return in.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return in.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return in.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return in.getSortedSet(field); + } + + @Override + public Bits getDocsWithField(FieldInfo field) throws IOException { + return in.getDocsWithField(field); + } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } + + // TODO: move this out somewhere else (and can fix all these null producers in lucene?) + // we shouldn't need nullness for any reason. + + public static final DocValuesProducer EMPTY = new DocValuesProducer() { + + @Override + public void close() throws IOException {} + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public Collection getChildResources() { + return Collections.emptyList(); + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public Bits getDocsWithField(FieldInfo field) throws IOException { + throw new IllegalStateException(); // we don't have any docvalues + } + + @Override + public void checkIntegrity() throws IOException {} + }; +} diff --git a/src/main/java/org/elasticsearch/index/merge/policy/VersionFieldUpgrader.java b/src/main/java/org/elasticsearch/index/merge/policy/VersionFieldUpgrader.java new file mode 100644 index 00000000000..1db3a9d9865 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/merge/policy/VersionFieldUpgrader.java @@ -0,0 +1,168 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.merge.policy; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FilterCodecReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; +import org.elasticsearch.common.Numbers; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.mapper.internal.VersionFieldMapper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; + +/** + * Converts 0.90.x _uid payloads to _version docvalues + */ +class VersionFieldUpgrader extends FilterCodecReader { + final FieldInfos infos; + + VersionFieldUpgrader(CodecReader in) { + super(in); + + // Find a free field number + int fieldNumber = 0; + for (FieldInfo fi : in.getFieldInfos()) { + fieldNumber = Math.max(fieldNumber, fi.number + 1); + } + + // TODO: lots of things can wrong here... + FieldInfo newInfo = new FieldInfo(VersionFieldMapper.NAME, // field name + fieldNumber, // field number + false, // store term vectors + false, // omit norms + false, // store payloads + IndexOptions.NONE, // index options + DocValuesType.NUMERIC, // docvalues + -1, // docvalues generation + Collections.emptyMap() // attributes + ); + newInfo.checkConsistency(); // fail merge immediately if above code is wrong + + final ArrayList fieldInfoList = new ArrayList<>(); + for (FieldInfo info : in.getFieldInfos()) { + if (!info.name.equals(VersionFieldMapper.NAME)) { + fieldInfoList.add(info); + } + } + fieldInfoList.add(newInfo); + infos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()])); + } + + static CodecReader wrap(CodecReader reader) throws IOException { + final FieldInfos fieldInfos = reader.getFieldInfos(); + final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME); + if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) { + // the reader is a recent one, it has versions and they are stored + // in a numeric doc values field + return reader; + } + // The segment is an old one, look at the _uid field + final Terms terms = reader.terms(UidFieldMapper.NAME); + if (terms == null || !terms.hasPayloads()) { + // The segment doesn't have an _uid field or doesn't have payloads + // don't try to do anything clever. If any other segment has versions + // all versions of this segment will be initialized to 0 + return reader; + } + // convert _uid payloads -> _version docvalues + return new VersionFieldUpgrader(reader); + } + + @Override + public FieldInfos getFieldInfos() { + return infos; + } + + @Override + public DocValuesProducer getDocValuesReader() { + DocValuesProducer producer = in.getDocValuesReader(); + // TODO: move this nullness stuff out + if (producer == null) { + producer = FilterDocValuesProducer.EMPTY; + } + return new UninvertedVersions(producer, this); + } + + static class UninvertedVersions extends FilterDocValuesProducer { + final CodecReader reader; + + UninvertedVersions(DocValuesProducer in, CodecReader reader) { + super(in); + this.reader = reader; + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + if (VersionFieldMapper.NAME.equals(field.name)) { + // uninvert into a packed ints and expose as docvalues + final Terms terms = reader.terms(UidFieldMapper.NAME); + final TermsEnum uids = terms.iterator(null); + final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.COMPACT); + DocsAndPositionsEnum dpe = null; + for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) { + dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); + assert dpe != null : "field has payloads"; + for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) { + dpe.nextPosition(); + final BytesRef payload = dpe.getPayload(); + if (payload != null && payload.length == 8) { + final long version = Numbers.bytesToLong(payload); + versions.set(doc, version); + break; + } + } + } + return versions; + } else { + return in.getNumeric(field); + } + } + + @Override + public Bits getDocsWithField(FieldInfo field) throws IOException { + if (VersionFieldMapper.NAME.equals(field.name)) { + return new Bits.MatchAllBits(reader.maxDoc()); + } else { + return in.getDocsWithField(field); + } + } + + @Override + public DocValuesProducer getMergeInstance() throws IOException { + return new UninvertedVersions(in.getMergeInstance(), reader); + } + } +} diff --git a/src/main/java/org/elasticsearch/percolator/PercolatorService.java b/src/main/java/org/elasticsearch/percolator/PercolatorService.java index c2d3e84e07f..3c4f8035c45 100644 --- a/src/main/java/org/elasticsearch/percolator/PercolatorService.java +++ b/src/main/java/org/elasticsearch/percolator/PercolatorService.java @@ -133,7 +133,8 @@ public class PercolatorService extends AbstractComponent { cache = new CloseableThreadLocal() { @Override protected MemoryIndex initialValue() { - return new ExtendedMemoryIndex(true, maxReuseBytes); + // TODO: should we expose payloads as an option? should offsets be turned on always? + return new ExtendedMemoryIndex(true, false, maxReuseBytes); } }; single = new SingleDocumentPercolatorIndex(cache); diff --git a/src/main/java/org/elasticsearch/percolator/QueryCollector.java b/src/main/java/org/elasticsearch/percolator/QueryCollector.java index ad24be5d9cd..f82ba28f002 100644 --- a/src/main/java/org/elasticsearch/percolator/QueryCollector.java +++ b/src/main/java/org/elasticsearch/percolator/QueryCollector.java @@ -226,6 +226,7 @@ abstract class QueryCollector extends SimpleCollector { final static class MatchAndSort extends QueryCollector { private final TopScoreDocCollector topDocsCollector; + private LeafCollector topDocsLeafCollector; MatchAndSort(ESLogger logger, PercolateContext context, boolean isNestedDoc) { super(logger, context, isNestedDoc); @@ -248,7 +249,7 @@ abstract class QueryCollector extends SimpleCollector { Lucene.exists(searcher, query, collector); } if (collector.exists()) { - topDocsCollector.collect(doc); + topDocsLeafCollector.collect(doc); postMatch(doc); } } catch (IOException e) { @@ -259,13 +260,12 @@ abstract class QueryCollector extends SimpleCollector { @Override public void doSetNextReader(LeafReaderContext context) throws IOException { super.doSetNextReader(context); - LeafCollector leafCollector = topDocsCollector.getLeafCollector(context); - assert leafCollector == topDocsCollector : "TopDocsCollector returns itself as leaf collector"; + topDocsLeafCollector = topDocsCollector.getLeafCollector(context); } @Override public void setScorer(Scorer scorer) throws IOException { - topDocsCollector.setScorer(scorer); + topDocsLeafCollector.setScorer(scorer); } TopDocs topDocs() { diff --git a/src/test/java/org/elasticsearch/index/merge/policy/VersionFieldUpgraderTest.java b/src/test/java/org/elasticsearch/index/merge/policy/VersionFieldUpgraderTest.java new file mode 100644 index 00000000000..ea51eb76509 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/merge/policy/VersionFieldUpgraderTest.java @@ -0,0 +1,144 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.merge.policy; + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.TestUtil; +import org.elasticsearch.common.Numbers; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.mapper.internal.VersionFieldMapper; +import org.elasticsearch.test.ElasticsearchLuceneTestCase; + +/** Tests upgrading old document versions from _uid payloads to _version docvalues */ +public class VersionFieldUpgraderTest extends ElasticsearchLuceneTestCase { + + /** Simple test: one doc in the old format, check that it looks correct */ + public void testUpgradeOneDocument() throws Exception { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null)); + + // add a document with a _uid having a payload of 3 + Document doc = new Document(); + Token token = new Token("1", 0, 1); + token.setPayload(new BytesRef(Numbers.longToBytes(3))); + doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token))); + iw.addDocument(doc); + iw.commit(); + + CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true)); + CodecReader upgraded = VersionFieldUpgrader.wrap(reader); + // we need to be upgraded, should be a different instance + assertNotSame(reader, upgraded); + + // make sure we can see our numericdocvalues in fieldinfos + FieldInfo versionField = upgraded.getFieldInfos().fieldInfo(VersionFieldMapper.NAME); + assertNotNull(versionField); + assertEquals(DocValuesType.NUMERIC, versionField.getDocValuesType()); + // should have a value of 3, and be visible in docsWithField + assertEquals(3, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(0)); + assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(0)); + + // verify filterreader with checkindex + TestUtil.checkReader(upgraded); + + reader.close(); + iw.close(); + dir.close(); + } + + /** test that we are a non-op if the segment already has the version field */ + public void testAlreadyUpgraded() throws Exception { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null)); + + // add a document with a _uid having a payload of 3 + Document doc = new Document(); + Token token = new Token("1", 0, 1); + token.setPayload(new BytesRef(Numbers.longToBytes(3))); + doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token))); + doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 3)); + iw.addDocument(doc); + iw.commit(); + + CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true)); + CodecReader upgraded = VersionFieldUpgrader.wrap(reader); + // we already upgraded: should be same instance + assertSame(reader, upgraded); + + reader.close(); + iw.close(); + dir.close(); + } + + /** Test upgrading two documents */ + public void testUpgradeTwoDocuments() throws Exception { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null)); + + // add a document with a _uid having a payload of 3 + Document doc = new Document(); + Token token = new Token("1", 0, 1); + token.setPayload(new BytesRef(Numbers.longToBytes(3))); + doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token))); + iw.addDocument(doc); + + doc = new Document(); + token = new Token("2", 0, 1); + token.setPayload(new BytesRef(Numbers.longToBytes(4))); + doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token))); + iw.addDocument(doc); + + iw.commit(); + + CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true)); + CodecReader upgraded = VersionFieldUpgrader.wrap(reader); + // we need to be upgraded, should be a different instance + assertNotSame(reader, upgraded); + + // make sure we can see our numericdocvalues in fieldinfos + FieldInfo versionField = upgraded.getFieldInfos().fieldInfo(VersionFieldMapper.NAME); + assertNotNull(versionField); + assertEquals(DocValuesType.NUMERIC, versionField.getDocValuesType()); + // should have a values of 3 and 4, and be visible in docsWithField + assertEquals(3, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(0)); + assertEquals(4, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(1)); + assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(0)); + assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(1)); + + // verify filterreader with checkindex + TestUtil.checkReader(upgraded); + + reader.close(); + iw.close(); + dir.close(); + } +} diff --git a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java index 6c49311e589..40a78c0e90e 100644 --- a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java @@ -223,9 +223,10 @@ public class ChildrenQueryTests extends AbstractChildTests { FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); MockScorer mockScorer = new MockScorer(scoreType); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); - expectedTopDocsCollector.setScorer(mockScorer); if (childValueToParentIds.containsKey(childValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); + final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext()); + leafCollector.setScorer(mockScorer); Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap parentIdToChildScores = childValueToParentIds.lget(); @@ -239,7 +240,7 @@ public class ChildrenQueryTests extends AbstractChildTests { docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores = entry.getValue(); - expectedTopDocsCollector.collect(docsEnum.docID()); + leafCollector.collect(docsEnum.docID()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } diff --git a/src/test/java/org/elasticsearch/index/search/child/ParentQueryTests.java b/src/test/java/org/elasticsearch/index/search/child/ParentQueryTests.java index 2934410cb2a..c2a752558c8 100644 --- a/src/test/java/org/elasticsearch/index/search/child/ParentQueryTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/ParentQueryTests.java @@ -205,9 +205,10 @@ public class ParentQueryTests extends AbstractChildTests { MockScorer mockScorer = new MockScorer(ScoreType.MAX); // just save one score per parent... mockScorer.scores = new FloatArrayList(); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); - expectedTopDocsCollector.setScorer(mockScorer); if (parentValueToChildIds.containsKey(parentValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); + final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext()); + leafCollector.setScorer(mockScorer); Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap childIdsAndScore = parentValueToChildIds.lget(); @@ -219,7 +220,7 @@ public class ParentQueryTests extends AbstractChildTests { docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores.add(entry.getValue()); - expectedTopDocsCollector.collect(docsEnum.docID()); + leafCollector.collect(docsEnum.docID()); mockScorer.scores.clear(); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; diff --git a/src/test/java/org/elasticsearch/test/ElasticsearchLuceneTestCase.java b/src/test/java/org/elasticsearch/test/ElasticsearchLuceneTestCase.java index 5d0255cb77f..3dcda533499 100644 --- a/src/test/java/org/elasticsearch/test/ElasticsearchLuceneTestCase.java +++ b/src/test/java/org/elasticsearch/test/ElasticsearchLuceneTestCase.java @@ -24,7 +24,6 @@ import com.carrotsearch.randomizedtesting.annotations.*; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; import org.apache.lucene.codecs.Codec; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TimeUnits; import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter; @@ -39,7 +38,6 @@ import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter; @ThreadLeakScope(Scope.SUITE) @ThreadLeakLingering(linger = 5000) // 5 sec lingering @TimeoutSuite(millis = TimeUnits.HOUR) -@SuppressCodecs("Lucene3x") @LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") public abstract class ElasticsearchLuceneTestCase extends LuceneTestCase {