mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-25 14:26:27 +00:00
core: upgrade to lucene 5 r1650327.
refactor _version docvalues migration to be more efficient. closes #9206
This commit is contained in:
parent
91e00c6c8e
commit
d226a973f7
4
pom.xml
4
pom.xml
@ -32,7 +32,7 @@
|
||||
|
||||
<properties>
|
||||
<lucene.version>5.0.0</lucene.version>
|
||||
<lucene.maven.version>5.0.0-snapshot-1649544</lucene.maven.version>
|
||||
<lucene.maven.version>5.0.0-snapshot-1650327</lucene.maven.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
@ -54,7 +54,7 @@
|
||||
</repository>
|
||||
<repository>
|
||||
<id>Lucene snapshots</id>
|
||||
<url>https://download.elasticsearch.org/lucenesnapshots/1649544</url>
|
||||
<url>https://download.elasticsearch.org/lucenesnapshots/1650327</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
|
@ -24,8 +24,8 @@ package org.apache.lucene.index.memory;
|
||||
*/
|
||||
public final class ExtendedMemoryIndex extends MemoryIndex {
|
||||
|
||||
public ExtendedMemoryIndex(boolean storeOffsets, long maxReusedBytes) {
|
||||
super(storeOffsets, maxReusedBytes);
|
||||
public ExtendedMemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
|
||||
super(storeOffsets, storePayloads, maxReusedBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1107,7 +1107,7 @@ public class InternalEngine implements Engine {
|
||||
for (LeafReaderContext reader : searcher.reader().leaves()) {
|
||||
final SegmentReader segmentReader = segmentReader(reader.reader());
|
||||
stats.add(1, segmentReader.ramBytesUsed());
|
||||
stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.fields()));
|
||||
stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.getPostingsReader()));
|
||||
stats.addStoredFieldsMemoryInBytes(guardedRamBytesUsed(segmentReader.getFieldsReader()));
|
||||
stats.addTermVectorsMemoryInBytes(guardedRamBytesUsed(segmentReader.getTermVectorsReader()));
|
||||
stats.addNormsMemoryInBytes(guardedRamBytesUsed(segmentReader.getNormsReader()));
|
||||
|
@ -19,24 +19,15 @@
|
||||
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Numbers;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@ -66,93 +57,12 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
}
|
||||
|
||||
/** Return an "upgraded" view of the reader. */
|
||||
static LeafReader filter(LeafReader reader) throws IOException {
|
||||
final FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
|
||||
if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
|
||||
// the reader is a recent one, it has versions and they are stored
|
||||
// in a numeric doc values field
|
||||
return reader;
|
||||
}
|
||||
// The segment is an old one, load all versions in memory and hide
|
||||
// them behind a numeric doc values field
|
||||
final Terms terms = reader.terms(UidFieldMapper.NAME);
|
||||
if (terms == null || !terms.hasPayloads()) {
|
||||
// The segment doesn't have an _uid field or doesn't have paylods
|
||||
// don't try to do anything clever. If any other segment has versions
|
||||
// all versions of this segment will be initialized to 0
|
||||
return reader;
|
||||
}
|
||||
final TermsEnum uids = terms.iterator(null);
|
||||
final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT);
|
||||
DocsAndPositionsEnum dpe = null;
|
||||
for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) {
|
||||
dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
assert dpe != null : "field has payloads";
|
||||
for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) {
|
||||
dpe.nextPosition();
|
||||
final BytesRef payload = dpe.getPayload();
|
||||
if (payload != null && payload.length == 8) {
|
||||
final long version = Numbers.bytesToLong(payload);
|
||||
versions.set(doc, version);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build new field infos, doc values, and return a filter reader
|
||||
final FieldInfo newVersionInfo;
|
||||
if (versionInfo == null) {
|
||||
// Find a free field number
|
||||
int fieldNumber = 0;
|
||||
for (FieldInfo fi : fieldInfos) {
|
||||
fieldNumber = Math.max(fieldNumber, fi.number + 1);
|
||||
}
|
||||
// TODO: lots of things can wrong here...
|
||||
newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, // field name
|
||||
fieldNumber, // field number
|
||||
false, // store term vectors
|
||||
false, // omit norms
|
||||
false, // store payloads
|
||||
IndexOptions.NONE, // index options
|
||||
DocValuesType.NUMERIC, // docvalues
|
||||
-1, // docvalues generation
|
||||
Collections.<String, String>emptyMap() // attributes
|
||||
);
|
||||
} else {
|
||||
newVersionInfo = versionInfo;
|
||||
}
|
||||
newVersionInfo.checkConsistency(); // fail merge immediately if above code is wrong
|
||||
final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
|
||||
for (FieldInfo info : fieldInfos) {
|
||||
if (info != versionInfo) {
|
||||
fieldInfoList.add(info);
|
||||
}
|
||||
}
|
||||
fieldInfoList.add(newVersionInfo);
|
||||
final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
|
||||
final NumericDocValues versionValues = new NumericDocValues() {
|
||||
@Override
|
||||
public long get(int index) {
|
||||
return versions.get(index);
|
||||
}
|
||||
};
|
||||
return new FilterLeafReader(reader) {
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return newFieldInfos;
|
||||
}
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
if (VersionFieldMapper.NAME.equals(field)) {
|
||||
return versionValues;
|
||||
}
|
||||
return super.getNumericDocValues(field);
|
||||
}
|
||||
@Override
|
||||
public Bits getDocsWithField(String field) throws IOException {
|
||||
return new Bits.MatchAllBits(in.maxDoc());
|
||||
}
|
||||
};
|
||||
static CodecReader filter(CodecReader reader) throws IOException {
|
||||
// convert 0.90.x _uid payloads to _version docvalues if needed
|
||||
reader = VersionFieldUpgrader.wrap(reader);
|
||||
// TODO: remove 0.90.x/1.x freqs/prox/payloads from _uid?
|
||||
// the previous code never did this, so some indexes carry around trash.
|
||||
return reader;
|
||||
}
|
||||
|
||||
static class IndexUpgraderOneMerge extends OneMerge {
|
||||
@ -162,13 +72,12 @@ public final class ElasticsearchMergePolicy extends MergePolicy {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LeafReader> getMergeReaders() throws IOException {
|
||||
final List<LeafReader> readers = super.getMergeReaders();
|
||||
ImmutableList.Builder<LeafReader> newReaders = ImmutableList.builder();
|
||||
for (LeafReader reader : readers) {
|
||||
public List<CodecReader> getMergeReaders() throws IOException {
|
||||
final List<CodecReader> newReaders = new ArrayList<>();
|
||||
for (CodecReader reader : super.getMergeReaders()) {
|
||||
newReaders.add(filter(reader));
|
||||
}
|
||||
return newReaders.build();
|
||||
return newReaders;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
// TODO: move this to lucene's FilterCodecReader
|
||||
|
||||
/**
|
||||
* Base class for filtering DocValuesProducer implementations.
|
||||
* <p>
|
||||
* NOTE: just like with DocValuesProducer, the default {@link #getMergeInstance()}
|
||||
* is unoptimized. overriding this method when possible can improve performance.
|
||||
*/
|
||||
class FilterDocValuesProducer extends DocValuesProducer {
|
||||
/** The underlying Producer instance. */
|
||||
protected final DocValuesProducer in;
|
||||
|
||||
/**
|
||||
* Creates a new FilterDocValuesProducer
|
||||
* @param in the underlying producer.
|
||||
*/
|
||||
FilterDocValuesProducer(DocValuesProducer in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return in.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return in.getChildResources();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
return in.getNumeric(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
return in.getBinary(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
return in.getSorted(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
return in.getSortedNumeric(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
return in.getSortedSet(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
return in.getDocsWithField(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
in.checkIntegrity();
|
||||
}
|
||||
|
||||
// TODO: move this out somewhere else (and can fix all these null producers in lucene?)
|
||||
// we shouldn't need nullness for any reason.
|
||||
|
||||
public static final DocValuesProducer EMPTY = new DocValuesProducer() {
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
throw new IllegalStateException(); // we don't have any docvalues
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {}
|
||||
};
|
||||
}
|
@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterCodecReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.Numbers;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Converts 0.90.x _uid payloads to _version docvalues
|
||||
*/
|
||||
class VersionFieldUpgrader extends FilterCodecReader {
|
||||
final FieldInfos infos;
|
||||
|
||||
VersionFieldUpgrader(CodecReader in) {
|
||||
super(in);
|
||||
|
||||
// Find a free field number
|
||||
int fieldNumber = 0;
|
||||
for (FieldInfo fi : in.getFieldInfos()) {
|
||||
fieldNumber = Math.max(fieldNumber, fi.number + 1);
|
||||
}
|
||||
|
||||
// TODO: lots of things can wrong here...
|
||||
FieldInfo newInfo = new FieldInfo(VersionFieldMapper.NAME, // field name
|
||||
fieldNumber, // field number
|
||||
false, // store term vectors
|
||||
false, // omit norms
|
||||
false, // store payloads
|
||||
IndexOptions.NONE, // index options
|
||||
DocValuesType.NUMERIC, // docvalues
|
||||
-1, // docvalues generation
|
||||
Collections.<String, String>emptyMap() // attributes
|
||||
);
|
||||
newInfo.checkConsistency(); // fail merge immediately if above code is wrong
|
||||
|
||||
final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
|
||||
for (FieldInfo info : in.getFieldInfos()) {
|
||||
if (!info.name.equals(VersionFieldMapper.NAME)) {
|
||||
fieldInfoList.add(info);
|
||||
}
|
||||
}
|
||||
fieldInfoList.add(newInfo);
|
||||
infos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
|
||||
}
|
||||
|
||||
static CodecReader wrap(CodecReader reader) throws IOException {
|
||||
final FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
|
||||
if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
|
||||
// the reader is a recent one, it has versions and they are stored
|
||||
// in a numeric doc values field
|
||||
return reader;
|
||||
}
|
||||
// The segment is an old one, look at the _uid field
|
||||
final Terms terms = reader.terms(UidFieldMapper.NAME);
|
||||
if (terms == null || !terms.hasPayloads()) {
|
||||
// The segment doesn't have an _uid field or doesn't have payloads
|
||||
// don't try to do anything clever. If any other segment has versions
|
||||
// all versions of this segment will be initialized to 0
|
||||
return reader;
|
||||
}
|
||||
// convert _uid payloads -> _version docvalues
|
||||
return new VersionFieldUpgrader(reader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return infos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer getDocValuesReader() {
|
||||
DocValuesProducer producer = in.getDocValuesReader();
|
||||
// TODO: move this nullness stuff out
|
||||
if (producer == null) {
|
||||
producer = FilterDocValuesProducer.EMPTY;
|
||||
}
|
||||
return new UninvertedVersions(producer, this);
|
||||
}
|
||||
|
||||
static class UninvertedVersions extends FilterDocValuesProducer {
|
||||
final CodecReader reader;
|
||||
|
||||
UninvertedVersions(DocValuesProducer in, CodecReader reader) {
|
||||
super(in);
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
if (VersionFieldMapper.NAME.equals(field.name)) {
|
||||
// uninvert into a packed ints and expose as docvalues
|
||||
final Terms terms = reader.terms(UidFieldMapper.NAME);
|
||||
final TermsEnum uids = terms.iterator(null);
|
||||
final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.COMPACT);
|
||||
DocsAndPositionsEnum dpe = null;
|
||||
for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) {
|
||||
dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
assert dpe != null : "field has payloads";
|
||||
for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) {
|
||||
dpe.nextPosition();
|
||||
final BytesRef payload = dpe.getPayload();
|
||||
if (payload != null && payload.length == 8) {
|
||||
final long version = Numbers.bytesToLong(payload);
|
||||
versions.set(doc, version);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return versions;
|
||||
} else {
|
||||
return in.getNumeric(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
if (VersionFieldMapper.NAME.equals(field.name)) {
|
||||
return new Bits.MatchAllBits(reader.maxDoc());
|
||||
} else {
|
||||
return in.getDocsWithField(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer getMergeInstance() throws IOException {
|
||||
return new UninvertedVersions(in.getMergeInstance(), reader);
|
||||
}
|
||||
}
|
||||
}
|
@ -133,7 +133,8 @@ public class PercolatorService extends AbstractComponent {
|
||||
cache = new CloseableThreadLocal<MemoryIndex>() {
|
||||
@Override
|
||||
protected MemoryIndex initialValue() {
|
||||
return new ExtendedMemoryIndex(true, maxReuseBytes);
|
||||
// TODO: should we expose payloads as an option? should offsets be turned on always?
|
||||
return new ExtendedMemoryIndex(true, false, maxReuseBytes);
|
||||
}
|
||||
};
|
||||
single = new SingleDocumentPercolatorIndex(cache);
|
||||
|
@ -226,6 +226,7 @@ abstract class QueryCollector extends SimpleCollector {
|
||||
final static class MatchAndSort extends QueryCollector {
|
||||
|
||||
private final TopScoreDocCollector topDocsCollector;
|
||||
private LeafCollector topDocsLeafCollector;
|
||||
|
||||
MatchAndSort(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
|
||||
super(logger, context, isNestedDoc);
|
||||
@ -248,7 +249,7 @@ abstract class QueryCollector extends SimpleCollector {
|
||||
Lucene.exists(searcher, query, collector);
|
||||
}
|
||||
if (collector.exists()) {
|
||||
topDocsCollector.collect(doc);
|
||||
topDocsLeafCollector.collect(doc);
|
||||
postMatch(doc);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
@ -259,13 +260,12 @@ abstract class QueryCollector extends SimpleCollector {
|
||||
@Override
|
||||
public void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
super.doSetNextReader(context);
|
||||
LeafCollector leafCollector = topDocsCollector.getLeafCollector(context);
|
||||
assert leafCollector == topDocsCollector : "TopDocsCollector returns itself as leaf collector";
|
||||
topDocsLeafCollector = topDocsCollector.getLeafCollector(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
topDocsCollector.setScorer(scorer);
|
||||
topDocsLeafCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
TopDocs topDocs() {
|
||||
|
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.merge.policy;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.common.Numbers;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
|
||||
/** Tests upgrading old document versions from _uid payloads to _version docvalues */
|
||||
public class VersionFieldUpgraderTest extends ElasticsearchLuceneTestCase {
|
||||
|
||||
/** Simple test: one doc in the old format, check that it looks correct */
|
||||
public void testUpgradeOneDocument() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
|
||||
// add a document with a _uid having a payload of 3
|
||||
Document doc = new Document();
|
||||
Token token = new Token("1", 0, 1);
|
||||
token.setPayload(new BytesRef(Numbers.longToBytes(3)));
|
||||
doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token)));
|
||||
iw.addDocument(doc);
|
||||
iw.commit();
|
||||
|
||||
CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true));
|
||||
CodecReader upgraded = VersionFieldUpgrader.wrap(reader);
|
||||
// we need to be upgraded, should be a different instance
|
||||
assertNotSame(reader, upgraded);
|
||||
|
||||
// make sure we can see our numericdocvalues in fieldinfos
|
||||
FieldInfo versionField = upgraded.getFieldInfos().fieldInfo(VersionFieldMapper.NAME);
|
||||
assertNotNull(versionField);
|
||||
assertEquals(DocValuesType.NUMERIC, versionField.getDocValuesType());
|
||||
// should have a value of 3, and be visible in docsWithField
|
||||
assertEquals(3, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(0));
|
||||
assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(0));
|
||||
|
||||
// verify filterreader with checkindex
|
||||
TestUtil.checkReader(upgraded);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** test that we are a non-op if the segment already has the version field */
|
||||
public void testAlreadyUpgraded() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
|
||||
// add a document with a _uid having a payload of 3
|
||||
Document doc = new Document();
|
||||
Token token = new Token("1", 0, 1);
|
||||
token.setPayload(new BytesRef(Numbers.longToBytes(3)));
|
||||
doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token)));
|
||||
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 3));
|
||||
iw.addDocument(doc);
|
||||
iw.commit();
|
||||
|
||||
CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true));
|
||||
CodecReader upgraded = VersionFieldUpgrader.wrap(reader);
|
||||
// we already upgraded: should be same instance
|
||||
assertSame(reader, upgraded);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Test upgrading two documents */
|
||||
public void testUpgradeTwoDocuments() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
|
||||
// add a document with a _uid having a payload of 3
|
||||
Document doc = new Document();
|
||||
Token token = new Token("1", 0, 1);
|
||||
token.setPayload(new BytesRef(Numbers.longToBytes(3)));
|
||||
doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token)));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
token = new Token("2", 0, 1);
|
||||
token.setPayload(new BytesRef(Numbers.longToBytes(4)));
|
||||
doc.add(new TextField(UidFieldMapper.NAME, new CannedTokenStream(token)));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.commit();
|
||||
|
||||
CodecReader reader = getOnlySegmentReader(DirectoryReader.open(iw, true));
|
||||
CodecReader upgraded = VersionFieldUpgrader.wrap(reader);
|
||||
// we need to be upgraded, should be a different instance
|
||||
assertNotSame(reader, upgraded);
|
||||
|
||||
// make sure we can see our numericdocvalues in fieldinfos
|
||||
FieldInfo versionField = upgraded.getFieldInfos().fieldInfo(VersionFieldMapper.NAME);
|
||||
assertNotNull(versionField);
|
||||
assertEquals(DocValuesType.NUMERIC, versionField.getDocValuesType());
|
||||
// should have a values of 3 and 4, and be visible in docsWithField
|
||||
assertEquals(3, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(0));
|
||||
assertEquals(4, upgraded.getNumericDocValues(VersionFieldMapper.NAME).get(1));
|
||||
assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(0));
|
||||
assertTrue(upgraded.getDocsWithField(VersionFieldMapper.NAME).get(1));
|
||||
|
||||
// verify filterreader with checkindex
|
||||
TestUtil.checkReader(upgraded);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
@ -223,9 +223,10 @@ public class ChildrenQueryTests extends AbstractChildTests {
|
||||
FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
|
||||
MockScorer mockScorer = new MockScorer(scoreType);
|
||||
TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
|
||||
expectedTopDocsCollector.setScorer(mockScorer);
|
||||
if (childValueToParentIds.containsKey(childValue)) {
|
||||
LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
|
||||
final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
|
||||
leafCollector.setScorer(mockScorer);
|
||||
Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
|
||||
if (terms != null) {
|
||||
NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
|
||||
@ -239,7 +240,7 @@ public class ChildrenQueryTests extends AbstractChildTests {
|
||||
docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
expectedResult.set(docsEnum.nextDoc());
|
||||
mockScorer.scores = entry.getValue();
|
||||
expectedTopDocsCollector.collect(docsEnum.docID());
|
||||
leafCollector.collect(docsEnum.docID());
|
||||
} else if (seekStatus == TermsEnum.SeekStatus.END) {
|
||||
break;
|
||||
}
|
||||
|
@ -205,9 +205,10 @@ public class ParentQueryTests extends AbstractChildTests {
|
||||
MockScorer mockScorer = new MockScorer(ScoreType.MAX); // just save one score per parent...
|
||||
mockScorer.scores = new FloatArrayList();
|
||||
TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
|
||||
expectedTopDocsCollector.setScorer(mockScorer);
|
||||
if (parentValueToChildIds.containsKey(parentValue)) {
|
||||
LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
|
||||
final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
|
||||
leafCollector.setScorer(mockScorer);
|
||||
Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
|
||||
if (terms != null) {
|
||||
NavigableMap<String, Float> childIdsAndScore = parentValueToChildIds.lget();
|
||||
@ -219,7 +220,7 @@ public class ParentQueryTests extends AbstractChildTests {
|
||||
docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
expectedResult.set(docsEnum.nextDoc());
|
||||
mockScorer.scores.add(entry.getValue());
|
||||
expectedTopDocsCollector.collect(docsEnum.docID());
|
||||
leafCollector.collect(docsEnum.docID());
|
||||
mockScorer.scores.clear();
|
||||
} else if (seekStatus == TermsEnum.SeekStatus.END) {
|
||||
break;
|
||||
|
@ -24,7 +24,6 @@ import com.carrotsearch.randomizedtesting.annotations.*;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter;
|
||||
|
||||
@ -39,7 +38,6 @@ import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter;
|
||||
@ThreadLeakScope(Scope.SUITE)
|
||||
@ThreadLeakLingering(linger = 5000) // 5 sec lingering
|
||||
@TimeoutSuite(millis = TimeUnits.HOUR)
|
||||
@SuppressCodecs("Lucene3x")
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose")
|
||||
public abstract class ElasticsearchLuceneTestCase extends LuceneTestCase {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user