mirror of https://github.com/apache/lucene.git
LUCENE-5894: refactor bulk merge logic
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619392 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8af2ef5345
commit
f979dee252
|
@ -21,7 +21,11 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
@ -36,12 +40,12 @@ public class SimpleTextNormsFormat extends NormsFormat {
|
||||||
private static final String NORMS_SEG_EXTENSION = "len";
|
private static final String NORMS_SEG_EXTENSION = "len";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
return new SimpleTextNormsConsumer(state);
|
return new SimpleTextNormsConsumer(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
return new SimpleTextNormsProducer(state);
|
return new SimpleTextNormsProducer(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,11 +56,33 @@ public class SimpleTextNormsFormat extends NormsFormat {
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public static class SimpleTextNormsProducer extends SimpleTextDocValuesReader {
|
public static class SimpleTextNormsProducer extends NormsProducer {
|
||||||
|
private final SimpleTextDocValuesReader impl;
|
||||||
|
|
||||||
public SimpleTextNormsProducer(SegmentReadState state) throws IOException {
|
public SimpleTextNormsProducer(SegmentReadState state) throws IOException {
|
||||||
// All we do is change the extension from .dat -> .len;
|
// All we do is change the extension from .dat -> .len;
|
||||||
// otherwise this is a normal simple doc values file:
|
// otherwise this is a normal simple doc values file:
|
||||||
super(state, NORMS_SEG_EXTENSION);
|
impl = new SimpleTextDocValuesReader(state, NORMS_SEG_EXTENSION);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||||
|
return impl.getNumeric(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
impl.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return impl.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
impl.checkIntegrity();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,11 +93,23 @@ public class SimpleTextNormsFormat extends NormsFormat {
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public static class SimpleTextNormsConsumer extends SimpleTextDocValuesWriter {
|
public static class SimpleTextNormsConsumer extends NormsConsumer {
|
||||||
|
private final SimpleTextDocValuesWriter impl;
|
||||||
|
|
||||||
public SimpleTextNormsConsumer(SegmentWriteState state) throws IOException {
|
public SimpleTextNormsConsumer(SegmentWriteState state) throws IOException {
|
||||||
// All we do is change the extension from .dat -> .len;
|
// All we do is change the extension from .dat -> .len;
|
||||||
// otherwise this is a normal simple doc values file:
|
// otherwise this is a normal simple doc values file:
|
||||||
super(state, NORMS_SEG_EXTENSION);
|
impl = new SimpleTextDocValuesWriter(state, NORMS_SEG_EXTENSION);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
impl.addNumericField(field, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
impl.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
@ -28,7 +29,9 @@ import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
import org.apache.lucene.index.FilteredTermsEnum;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
@ -51,13 +54,14 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* The lifecycle is:
|
* The lifecycle is:
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>DocValuesConsumer is created by
|
* <li>DocValuesConsumer is created by
|
||||||
* {@link DocValuesFormat#fieldsConsumer(SegmentWriteState)} or
|
|
||||||
* {@link NormsFormat#normsConsumer(SegmentWriteState)}.
|
* {@link NormsFormat#normsConsumer(SegmentWriteState)}.
|
||||||
* <li>{@link #addNumericField}, {@link #addBinaryField},
|
* <li>{@link #addNumericField}, {@link #addBinaryField},
|
||||||
* or {@link #addSortedField} are called for each Numeric,
|
* {@link #addSortedField}, {@link #addSortedSetField},
|
||||||
* Binary, or Sorted docvalues field. The API is a "pull" rather
|
* or {@link #addSortedNumericField} are called for each Numeric,
|
||||||
* than "push", and the implementation is free to iterate over the
|
* Binary, Sorted, SortedSet, or SortedNumeric docvalues field.
|
||||||
* values multiple times ({@link Iterable#iterator()}).
|
* The API is a "pull" rather than "push", and the implementation
|
||||||
|
* is free to iterate over the values multiple times
|
||||||
|
* ({@link Iterable#iterator()}).
|
||||||
* <li>After all fields are added, the consumer is {@link #close}d.
|
* <li>After all fields are added, the consumer is {@link #close}d.
|
||||||
* </ol>
|
* </ol>
|
||||||
*
|
*
|
||||||
|
@ -118,6 +122,83 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
*/
|
*/
|
||||||
public abstract void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException;
|
public abstract void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException;
|
||||||
|
|
||||||
|
/** Merges in the fields from the readers in
|
||||||
|
* <code>mergeState</code>. The default implementation
|
||||||
|
* calls {@link #mergeNumericField}, {@link #mergeBinaryField},
|
||||||
|
* {@link #mergeSortedField}, {@link #mergeSortedSetField},
|
||||||
|
* or {@link #mergeSortedNumericField} for each field,
|
||||||
|
* depending on its type.
|
||||||
|
* Implementations can override this method
|
||||||
|
* for more sophisticated merging (bulk-byte copying, etc). */
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
for (FieldInfo field : mergeState.fieldInfos) {
|
||||||
|
DocValuesType type = field.getDocValuesType();
|
||||||
|
if (type != null) {
|
||||||
|
if (type == DocValuesType.NUMERIC) {
|
||||||
|
List<NumericDocValues> toMerge = new ArrayList<>();
|
||||||
|
List<Bits> docsWithField = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
NumericDocValues values = reader.getNumericDocValues(field.name);
|
||||||
|
Bits bits = reader.getDocsWithField(field.name);
|
||||||
|
if (values == null) {
|
||||||
|
values = DocValues.emptyNumeric();
|
||||||
|
bits = new Bits.MatchNoBits(reader.maxDoc());
|
||||||
|
}
|
||||||
|
toMerge.add(values);
|
||||||
|
docsWithField.add(bits);
|
||||||
|
}
|
||||||
|
mergeNumericField(field, mergeState, toMerge, docsWithField);
|
||||||
|
} else if (type == DocValuesType.BINARY) {
|
||||||
|
List<BinaryDocValues> toMerge = new ArrayList<>();
|
||||||
|
List<Bits> docsWithField = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
BinaryDocValues values = reader.getBinaryDocValues(field.name);
|
||||||
|
Bits bits = reader.getDocsWithField(field.name);
|
||||||
|
if (values == null) {
|
||||||
|
values = DocValues.emptyBinary();
|
||||||
|
bits = new Bits.MatchNoBits(reader.maxDoc());
|
||||||
|
}
|
||||||
|
toMerge.add(values);
|
||||||
|
docsWithField.add(bits);
|
||||||
|
}
|
||||||
|
mergeBinaryField(field, mergeState, toMerge, docsWithField);
|
||||||
|
} else if (type == DocValuesType.SORTED) {
|
||||||
|
List<SortedDocValues> toMerge = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
SortedDocValues values = reader.getSortedDocValues(field.name);
|
||||||
|
if (values == null) {
|
||||||
|
values = DocValues.emptySorted();
|
||||||
|
}
|
||||||
|
toMerge.add(values);
|
||||||
|
}
|
||||||
|
mergeSortedField(field, mergeState, toMerge);
|
||||||
|
} else if (type == DocValuesType.SORTED_SET) {
|
||||||
|
List<SortedSetDocValues> toMerge = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
SortedSetDocValues values = reader.getSortedSetDocValues(field.name);
|
||||||
|
if (values == null) {
|
||||||
|
values = DocValues.emptySortedSet();
|
||||||
|
}
|
||||||
|
toMerge.add(values);
|
||||||
|
}
|
||||||
|
mergeSortedSetField(field, mergeState, toMerge);
|
||||||
|
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
||||||
|
List<SortedNumericDocValues> toMerge = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
SortedNumericDocValues values = reader.getSortedNumericDocValues(field.name);
|
||||||
|
if (values == null) {
|
||||||
|
values = DocValues.emptySortedNumeric(reader.maxDoc());
|
||||||
|
}
|
||||||
|
toMerge.add(values);
|
||||||
|
}
|
||||||
|
mergeSortedNumericField(field, mergeState, toMerge);
|
||||||
|
} else {
|
||||||
|
throw new AssertionError("type=" + type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges the numeric docvalues from <code>toMerge</code>.
|
* Merges the numeric docvalues from <code>toMerge</code>.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -29,8 +29,8 @@ import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
/** Abstract API that produces numeric, binary and
|
/** Abstract API that produces numeric, binary, sorted, sortedset,
|
||||||
* sorted docvalues.
|
* and sortednumeric docvalues.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -19,10 +19,15 @@ package org.apache.lucene.codecs;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo; // javadocs
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.SegmentWriteState; // javadocs
|
import org.apache.lucene.index.MappedMultiFields;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.ReaderSlice;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract API that consumes terms, doc, freq, prox, offset and
|
* Abstract API that consumes terms, doc, freq, prox, offset and
|
||||||
|
@ -73,6 +78,34 @@ public abstract class FieldsConsumer implements Closeable {
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public abstract void write(Fields fields) throws IOException;
|
public abstract void write(Fields fields) throws IOException;
|
||||||
|
|
||||||
|
/** Merges in the fields from the readers in
|
||||||
|
* <code>mergeState</code>. The default implementation skips
|
||||||
|
* and maps around deleted documents, and calls {@link #write(Fields)}.
|
||||||
|
* Implementations can override this method for more sophisticated
|
||||||
|
* merging (bulk-byte copying, etc). */
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
final List<Fields> fields = new ArrayList<>();
|
||||||
|
final List<ReaderSlice> slices = new ArrayList<>();
|
||||||
|
|
||||||
|
int docBase = 0;
|
||||||
|
|
||||||
|
for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
|
||||||
|
final AtomicReader reader = mergeState.readers.get(readerIndex);
|
||||||
|
final Fields f = reader.fields();
|
||||||
|
final int maxDoc = reader.maxDoc();
|
||||||
|
if (f != null) {
|
||||||
|
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
|
||||||
|
fields.add(f);
|
||||||
|
}
|
||||||
|
docBase += maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fields mergedFields = new MappedMultiFields(mergeState,
|
||||||
|
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
||||||
|
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
|
||||||
|
write(mergedFields);
|
||||||
|
}
|
||||||
|
|
||||||
// NOTE: strange but necessary so javadocs linting is happy:
|
// NOTE: strange but necessary so javadocs linting is happy:
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,163 @@
|
||||||
|
package org.apache.lucene.codecs;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract API that consumes normalization values.
|
||||||
|
* Concrete implementations of this
|
||||||
|
* actually do "something" with the norms (write it into
|
||||||
|
* the index in a specific format).
|
||||||
|
* <p>
|
||||||
|
* The lifecycle is:
|
||||||
|
* <ol>
|
||||||
|
* <li>NormsConsumer is created by
|
||||||
|
* {@link NormsFormat#normsConsumer(SegmentWriteState)}.
|
||||||
|
* <li>{@link #addNormsField} is called for each field with
|
||||||
|
* normalization values. The API is a "pull" rather
|
||||||
|
* than "push", and the implementation is free to iterate over the
|
||||||
|
* values multiple times ({@link Iterable#iterator()}).
|
||||||
|
* <li>After all fields are added, the consumer is {@link #close}d.
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class NormsConsumer implements Closeable {
|
||||||
|
|
||||||
|
/** Sole constructor. (For invocation by subclass
|
||||||
|
* constructors, typically implicit.) */
|
||||||
|
protected NormsConsumer() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes normalization values for a field.
|
||||||
|
* @param field field information
|
||||||
|
* @param values Iterable of numeric values (one for each document).
|
||||||
|
* @throws IOException if an I/O error occurred.
|
||||||
|
*/
|
||||||
|
public abstract void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException;
|
||||||
|
|
||||||
|
/** Merges in the fields from the readers in
|
||||||
|
* <code>mergeState</code>. The default implementation
|
||||||
|
* calls {@link #mergeNormsField} for each field,
|
||||||
|
* filling segments with missing norms for the field with zeros.
|
||||||
|
* Implementations can override this method
|
||||||
|
* for more sophisticated merging (bulk-byte copying, etc). */
|
||||||
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
|
for (FieldInfo field : mergeState.fieldInfos) {
|
||||||
|
if (field.hasNorms()) {
|
||||||
|
List<NumericDocValues> toMerge = new ArrayList<>();
|
||||||
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
NumericDocValues norms = reader.getNormValues(field.name);
|
||||||
|
if (norms == null) {
|
||||||
|
norms = DocValues.emptyNumeric();
|
||||||
|
}
|
||||||
|
toMerge.add(norms);
|
||||||
|
}
|
||||||
|
mergeNormsField(field, mergeState, toMerge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the norms from <code>toMerge</code>.
|
||||||
|
* <p>
|
||||||
|
* The default implementation calls {@link #addNormsField}, passing
|
||||||
|
* an Iterable that merges and filters deleted documents on the fly.
|
||||||
|
*/
|
||||||
|
public void mergeNormsField(final FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge) throws IOException {
|
||||||
|
|
||||||
|
// TODO: try to share code with default merge of DVConsumer by passing MatchAllBits ?
|
||||||
|
addNormsField(fieldInfo,
|
||||||
|
new Iterable<Number>() {
|
||||||
|
@Override
|
||||||
|
public Iterator<Number> iterator() {
|
||||||
|
return new Iterator<Number>() {
|
||||||
|
int readerUpto = -1;
|
||||||
|
int docIDUpto;
|
||||||
|
long nextValue;
|
||||||
|
AtomicReader currentReader;
|
||||||
|
NumericDocValues currentValues;
|
||||||
|
Bits currentLiveDocs;
|
||||||
|
boolean nextIsSet;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return nextIsSet || setNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Number next() {
|
||||||
|
if (!hasNext()) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
assert nextIsSet;
|
||||||
|
nextIsSet = false;
|
||||||
|
return nextValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean setNext() {
|
||||||
|
while (true) {
|
||||||
|
if (readerUpto == toMerge.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
|
||||||
|
readerUpto++;
|
||||||
|
if (readerUpto < toMerge.size()) {
|
||||||
|
currentReader = mergeState.readers.get(readerUpto);
|
||||||
|
currentValues = toMerge.get(readerUpto);
|
||||||
|
currentLiveDocs = currentReader.getLiveDocs();
|
||||||
|
}
|
||||||
|
docIDUpto = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
|
||||||
|
nextIsSet = true;
|
||||||
|
nextValue = currentValues.get(docIDUpto);
|
||||||
|
docIDUpto++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
docIDUpto++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -31,12 +31,12 @@ public abstract class NormsFormat {
|
||||||
protected NormsFormat() {
|
protected NormsFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a {@link DocValuesConsumer} to write norms to the
|
/** Returns a {@link NormsConsumer} to write norms to the
|
||||||
* index. */
|
* index. */
|
||||||
public abstract DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException;
|
public abstract NormsConsumer normsConsumer(SegmentWriteState state) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a {@link DocValuesProducer} to read norms from the index.
|
* Returns a {@link NormsProducer} to read norms from the index.
|
||||||
* <p>
|
* <p>
|
||||||
* NOTE: by the time this call returns, it must hold open any files it will
|
* NOTE: by the time this call returns, it must hold open any files it will
|
||||||
* need to use; else, those files may be deleted. Additionally, required files
|
* need to use; else, those files may be deleted. Additionally, required files
|
||||||
|
@ -45,5 +45,5 @@ public abstract class NormsFormat {
|
||||||
* the implementation. IOExceptions are expected and will automatically cause
|
* the implementation. IOExceptions are expected and will automatically cause
|
||||||
* a retry of the segment opening logic with the newly revised segments.
|
* a retry of the segment opening logic with the newly revised segments.
|
||||||
*/
|
*/
|
||||||
public abstract DocValuesProducer normsProducer(SegmentReadState state) throws IOException;
|
public abstract NormsProducer normsProducer(SegmentReadState state) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
package org.apache.lucene.codecs;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Abstract API that produces field normalization values
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class NormsProducer implements Closeable, Accountable {
|
||||||
|
|
||||||
|
/** Sole constructor. (For invocation by subclass
|
||||||
|
* constructors, typically implicit.) */
|
||||||
|
protected NormsProducer() {}
|
||||||
|
|
||||||
|
/** Returns {@link NumericDocValues} for this field.
|
||||||
|
* The returned instance need not be thread-safe: it will only be
|
||||||
|
* used by a single thread. */
|
||||||
|
public abstract NumericDocValues getNorms(FieldInfo field) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks consistency of this producer
|
||||||
|
* <p>
|
||||||
|
* Note that this may be costly in terms of I/O, e.g.
|
||||||
|
* may involve computing a checksum value against large data files.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public abstract void checkIntegrity() throws IOException;
|
||||||
|
}
|
|
@ -31,8 +31,8 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
|
||||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_EXTENSION;
|
||||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_INDEX_EXTENSION;
|
||||||
|
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
|
@ -17,9 +17,6 @@ package org.apache.lucene.codecs.compressing;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
|
|
||||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
@ -54,6 +51,12 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
*/
|
*/
|
||||||
public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
|
|
||||||
|
/** Extension of stored fields file */
|
||||||
|
public static final String FIELDS_EXTENSION = "fdt";
|
||||||
|
|
||||||
|
/** Extension of stored fields index file */
|
||||||
|
public static final String FIELDS_INDEX_EXTENSION = "fdx";
|
||||||
|
|
||||||
// hard limit on the maximum number of documents per chunk
|
// hard limit on the maximum number of documents per chunk
|
||||||
static final int MAX_DOCUMENTS_PER_CHUNK = 128;
|
static final int MAX_DOCUMENTS_PER_CHUNK = 128;
|
||||||
|
|
||||||
|
@ -331,9 +334,11 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
public int merge(MergeState mergeState) throws IOException {
|
public int merge(MergeState mergeState) throws IOException {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
|
MatchingReaders matching = new MatchingReaders(mergeState);
|
||||||
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
final SegmentReader matchingSegmentReader = matching.matchingSegmentReaders[idx++];
|
||||||
CompressingStoredFieldsReader matchingFieldsReader = null;
|
CompressingStoredFieldsReader matchingFieldsReader = null;
|
||||||
if (matchingSegmentReader != null) {
|
if (matchingSegmentReader != null) {
|
||||||
final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
|
final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
|
||||||
|
|
|
@ -730,8 +730,10 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
|
MatchingReaders matching = new MatchingReaders(mergeState);
|
||||||
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
final SegmentReader matchingSegmentReader = matching.matchingSegmentReaders[idx++];
|
||||||
CompressingTermVectorsReader matchingVectorsReader = null;
|
CompressingTermVectorsReader matchingVectorsReader = null;
|
||||||
if (matchingSegmentReader != null) {
|
if (matchingSegmentReader != null) {
|
||||||
final TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
final TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.SegmentReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes which segments have identical field name->number mappings,
|
||||||
|
* which allows stored fields and term vectors in this codec to be bulk-merged.
|
||||||
|
*/
|
||||||
|
class MatchingReaders {
|
||||||
|
|
||||||
|
/** {@link SegmentReader}s that have identical field
|
||||||
|
* name/number mapping, so their stored fields and term
|
||||||
|
* vectors may be bulk merged. */
|
||||||
|
final SegmentReader[] matchingSegmentReaders;
|
||||||
|
|
||||||
|
/** How many {@link #matchingSegmentReaders} are set. */
|
||||||
|
final int count;
|
||||||
|
|
||||||
|
MatchingReaders(MergeState mergeState) {
|
||||||
|
// If the i'th reader is a SegmentReader and has
|
||||||
|
// identical fieldName -> number mapping, then this
|
||||||
|
// array will be non-null at position i:
|
||||||
|
int numReaders = mergeState.readers.size();
|
||||||
|
int matchedCount = 0;
|
||||||
|
matchingSegmentReaders = new SegmentReader[numReaders];
|
||||||
|
|
||||||
|
// If this reader is a SegmentReader, and all of its
|
||||||
|
// field name -> number mappings match the "merged"
|
||||||
|
// FieldInfos, then we can do a bulk copy of the
|
||||||
|
// stored fields:
|
||||||
|
for (int i = 0; i < numReaders; i++) {
|
||||||
|
AtomicReader reader = mergeState.readers.get(i);
|
||||||
|
// TODO: we may be able to broaden this to
|
||||||
|
// non-SegmentReaders, since FieldInfos is now
|
||||||
|
// required? But... this'd also require exposing
|
||||||
|
// bulk-copy (TVs and stored fields) API in foreign
|
||||||
|
// readers..
|
||||||
|
if (reader instanceof SegmentReader) {
|
||||||
|
SegmentReader segmentReader = (SegmentReader) reader;
|
||||||
|
boolean same = true;
|
||||||
|
FieldInfos segmentFieldInfos = segmentReader.getFieldInfos();
|
||||||
|
for (FieldInfo fi : segmentFieldInfos) {
|
||||||
|
FieldInfo other = mergeState.fieldInfos.fieldInfo(fi.number);
|
||||||
|
if (other == null || !other.name.equals(fi.name)) {
|
||||||
|
same = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (same) {
|
||||||
|
matchingSegmentReaders[i] = segmentReader;
|
||||||
|
matchedCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.count = matchedCount;
|
||||||
|
|
||||||
|
if (mergeState.infoStream.isEnabled("SM")) {
|
||||||
|
mergeState.infoStream.message("SM", "merge store matchedCount=" + count + " vs " + mergeState.readers.size());
|
||||||
|
if (count != mergeState.readers.size()) {
|
||||||
|
mergeState.infoStream.message("SM", "" + (mergeState.readers.size() - count) + " non-bulk merges");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -62,12 +62,12 @@ public class Lucene40Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final StoredFieldsFormat storedFieldsFormat() {
|
public StoredFieldsFormat storedFieldsFormat() {
|
||||||
return fieldsFormat;
|
return fieldsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TermVectorsFormat termVectorsFormat() {
|
public TermVectorsFormat termVectorsFormat() {
|
||||||
return vectorsFormat;
|
return vectorsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -49,15 +49,15 @@ public class Lucene40NormsFormat extends NormsFormat {
|
||||||
public Lucene40NormsFormat() {}
|
public Lucene40NormsFormat() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
||||||
"nrm",
|
"nrm",
|
||||||
IndexFileNames.COMPOUND_FILE_EXTENSION);
|
IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||||
return new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
return new Lucene40NormsReader(state, filename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads 4.0/4.1 norms.
|
||||||
|
* Implemented the same as docvalues, but with a different filename.
|
||||||
|
* @deprecated Only for reading old 4.0 and 4.1 segments
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
class Lucene40NormsReader extends NormsProducer {
|
||||||
|
private final Lucene40DocValuesReader impl;
|
||||||
|
|
||||||
|
public Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
|
||||||
|
impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||||
|
return impl.getNumeric(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
impl.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return impl.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
impl.checkIntegrity();
|
||||||
|
}
|
||||||
|
}
|
|
@ -94,6 +94,6 @@ public class Lucene40StoredFieldsFormat extends StoredFieldsFormat {
|
||||||
@Override
|
@Override
|
||||||
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
|
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
|
||||||
IOContext context) throws IOException {
|
IOContext context) throws IOException {
|
||||||
return new Lucene40StoredFieldsWriter(directory, si.name, context);
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,8 +37,6 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class responsible for access to stored document fields.
|
* Class responsible for access to stored document fields.
|
||||||
* <p/>
|
* <p/>
|
||||||
|
@ -49,6 +47,38 @@ import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*;
|
||||||
*/
|
*/
|
||||||
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
|
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
|
||||||
|
|
||||||
|
// NOTE: bit 0 is free here! You can steal it!
|
||||||
|
static final int FIELD_IS_BINARY = 1 << 1;
|
||||||
|
|
||||||
|
// the old bit 1 << 2 was compressed, is now left out
|
||||||
|
|
||||||
|
private static final int _NUMERIC_BIT_SHIFT = 3;
|
||||||
|
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
|
static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
|
// the next possible bits are: 1 << 6; 1 << 7
|
||||||
|
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
||||||
|
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
|
static final String CODEC_NAME_IDX = "Lucene40StoredFieldsIndex";
|
||||||
|
static final String CODEC_NAME_DAT = "Lucene40StoredFieldsData";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
|
||||||
|
static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Extension of stored fields file */
|
||||||
|
public static final String FIELDS_EXTENSION = "fdt";
|
||||||
|
|
||||||
|
/** Extension of stored fields index file */
|
||||||
|
public static final String FIELDS_INDEX_EXTENSION = "fdx";
|
||||||
|
|
||||||
private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene40StoredFieldsReader.class);
|
private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene40StoredFieldsReader.class);
|
||||||
|
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
|
@ -224,32 +254,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the length in bytes of each raw document in a
|
|
||||||
* contiguous range of length numDocs starting with
|
|
||||||
* startDocID. Returns the IndexInput (the fieldStream),
|
|
||||||
* already seeked to the starting point for startDocID.*/
|
|
||||||
public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
|
|
||||||
seekIndex(startDocID);
|
|
||||||
long startOffset = indexStream.readLong();
|
|
||||||
long lastOffset = startOffset;
|
|
||||||
int count = 0;
|
|
||||||
while (count < numDocs) {
|
|
||||||
final long offset;
|
|
||||||
final int docID = startDocID + count + 1;
|
|
||||||
assert docID <= numTotalDocs;
|
|
||||||
if (docID < numTotalDocs)
|
|
||||||
offset = indexStream.readLong();
|
|
||||||
else
|
|
||||||
offset = fieldsStream.length();
|
|
||||||
lengths[count++] = (int) (offset-lastOffset);
|
|
||||||
lastOffset = offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
fieldsStream.seek(startOffset);
|
|
||||||
|
|
||||||
return fieldsStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
return RAM_BYTES_USED;
|
return RAM_BYTES_USED;
|
||||||
|
|
|
@ -1,356 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene40;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copyright 2004 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
||||||
* use this file except in compliance with the License. You may obtain a copy of
|
|
||||||
* the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
* License for the specific language governing permissions and limitations under
|
|
||||||
* the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.apache.lucene.index.MergeState;
|
|
||||||
import org.apache.lucene.index.SegmentReader;
|
|
||||||
import org.apache.lucene.index.StorableField;
|
|
||||||
import org.apache.lucene.index.StoredDocument;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class responsible for writing stored document fields.
|
|
||||||
* <p/>
|
|
||||||
* It uses <segment>.fdt and <segment>.fdx; files.
|
|
||||||
*
|
|
||||||
* @see Lucene40StoredFieldsFormat
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|
||||||
// NOTE: bit 0 is free here! You can steal it!
|
|
||||||
static final int FIELD_IS_BINARY = 1 << 1;
|
|
||||||
|
|
||||||
// the old bit 1 << 2 was compressed, is now left out
|
|
||||||
|
|
||||||
private static final int _NUMERIC_BIT_SHIFT = 3;
|
|
||||||
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
|
|
||||||
|
|
||||||
static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
|
|
||||||
static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
|
|
||||||
static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
|
|
||||||
static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
|
|
||||||
|
|
||||||
// the next possible bits are: 1 << 6; 1 << 7
|
|
||||||
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
|
||||||
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
|
||||||
|
|
||||||
static final String CODEC_NAME_IDX = "Lucene40StoredFieldsIndex";
|
|
||||||
static final String CODEC_NAME_DAT = "Lucene40StoredFieldsData";
|
|
||||||
static final int VERSION_START = 0;
|
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
|
||||||
static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
|
|
||||||
static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Extension of stored fields file */
|
|
||||||
public static final String FIELDS_EXTENSION = "fdt";
|
|
||||||
|
|
||||||
/** Extension of stored fields index file */
|
|
||||||
public static final String FIELDS_INDEX_EXTENSION = "fdx";
|
|
||||||
|
|
||||||
private final Directory directory;
|
|
||||||
private final String segment;
|
|
||||||
private IndexOutput fieldsStream;
|
|
||||||
private IndexOutput indexStream;
|
|
||||||
private final RAMOutputStream fieldsBuffer = new RAMOutputStream();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene40StoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
|
||||||
assert directory != null;
|
|
||||||
this.directory = directory;
|
|
||||||
this.segment = segment;
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
|
||||||
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
|
|
||||||
|
|
||||||
CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
|
|
||||||
CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
|
|
||||||
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
|
||||||
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int numStoredFields;
|
|
||||||
|
|
||||||
// Writes the contents of buffer into the fields stream
|
|
||||||
// and adds a new entry for this document into the index
|
|
||||||
// stream. This assumes the buffer was already written
|
|
||||||
// in the correct fields format.
|
|
||||||
@Override
|
|
||||||
public void startDocument() throws IOException {
|
|
||||||
indexStream.writeLong(fieldsStream.getFilePointer());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void finishDocument() throws IOException {
|
|
||||||
fieldsStream.writeVInt(numStoredFields);
|
|
||||||
fieldsBuffer.writeTo(fieldsStream);
|
|
||||||
fieldsBuffer.reset();
|
|
||||||
numStoredFields = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
try {
|
|
||||||
IOUtils.close(fieldsStream, indexStream);
|
|
||||||
} finally {
|
|
||||||
fieldsStream = indexStream = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void abort() {
|
|
||||||
try {
|
|
||||||
close();
|
|
||||||
} catch (Throwable ignored) {}
|
|
||||||
IOUtils.deleteFilesIgnoringExceptions(directory,
|
|
||||||
IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION),
|
|
||||||
IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeField(FieldInfo info, StorableField field) throws IOException {
|
|
||||||
numStoredFields++;
|
|
||||||
|
|
||||||
fieldsBuffer.writeVInt(info.number);
|
|
||||||
int bits = 0;
|
|
||||||
final BytesRef bytes;
|
|
||||||
final String string;
|
|
||||||
// TODO: maybe a field should serialize itself?
|
|
||||||
// this way we don't bake into indexer all these
|
|
||||||
// specific encodings for different fields? and apps
|
|
||||||
// can customize...
|
|
||||||
|
|
||||||
Number number = field.numericValue();
|
|
||||||
if (number != null) {
|
|
||||||
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
|
||||||
bits |= FIELD_IS_NUMERIC_INT;
|
|
||||||
} else if (number instanceof Long) {
|
|
||||||
bits |= FIELD_IS_NUMERIC_LONG;
|
|
||||||
} else if (number instanceof Float) {
|
|
||||||
bits |= FIELD_IS_NUMERIC_FLOAT;
|
|
||||||
} else if (number instanceof Double) {
|
|
||||||
bits |= FIELD_IS_NUMERIC_DOUBLE;
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
|
|
||||||
}
|
|
||||||
string = null;
|
|
||||||
bytes = null;
|
|
||||||
} else {
|
|
||||||
bytes = field.binaryValue();
|
|
||||||
if (bytes != null) {
|
|
||||||
bits |= FIELD_IS_BINARY;
|
|
||||||
string = null;
|
|
||||||
} else {
|
|
||||||
string = field.stringValue();
|
|
||||||
if (string == null) {
|
|
||||||
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fieldsBuffer.writeByte((byte) bits);
|
|
||||||
|
|
||||||
if (bytes != null) {
|
|
||||||
fieldsBuffer.writeVInt(bytes.length);
|
|
||||||
fieldsBuffer.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
|
||||||
} else if (string != null) {
|
|
||||||
fieldsBuffer.writeString(field.stringValue());
|
|
||||||
} else {
|
|
||||||
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
|
||||||
fieldsBuffer.writeInt(number.intValue());
|
|
||||||
} else if (number instanceof Long) {
|
|
||||||
fieldsBuffer.writeLong(number.longValue());
|
|
||||||
} else if (number instanceof Float) {
|
|
||||||
fieldsBuffer.writeInt(Float.floatToIntBits(number.floatValue()));
|
|
||||||
} else if (number instanceof Double) {
|
|
||||||
fieldsBuffer.writeLong(Double.doubleToLongBits(number.doubleValue()));
|
|
||||||
} else {
|
|
||||||
throw new AssertionError("Cannot get here");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Bulk write a contiguous series of documents. The
|
|
||||||
* lengths array is the length (in bytes) of each raw
|
|
||||||
* document. The stream IndexInput is the
|
|
||||||
* fieldsStream from which we should bulk-copy all
|
|
||||||
* bytes. */
|
|
||||||
public void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
|
|
||||||
long position = fieldsStream.getFilePointer();
|
|
||||||
long start = position;
|
|
||||||
for(int i=0;i<numDocs;i++) {
|
|
||||||
indexStream.writeLong(position);
|
|
||||||
position += lengths[i];
|
|
||||||
}
|
|
||||||
fieldsStream.copyBytes(stream, position-start);
|
|
||||||
assert fieldsStream.getFilePointer() == position;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void finish(FieldInfos fis, int numDocs) {
|
|
||||||
long indexFP = indexStream.getFilePointer();
|
|
||||||
if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexFP)
|
|
||||||
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
|
||||||
// we detect that the bug has struck, here, and
|
|
||||||
// throw an exception to prevent the corruption from
|
|
||||||
// entering the index. See LUCENE-1282 for
|
|
||||||
// details.
|
|
||||||
throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexFP + " (wrote numDocs=" + ((indexFP-HEADER_LENGTH_IDX)/8.0) + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int merge(MergeState mergeState) throws IOException {
|
|
||||||
int docCount = 0;
|
|
||||||
// Used for bulk-reading raw bytes for stored fields
|
|
||||||
int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
|
|
||||||
int idx = 0;
|
|
||||||
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
|
||||||
Lucene40StoredFieldsReader matchingFieldsReader = null;
|
|
||||||
if (matchingSegmentReader != null) {
|
|
||||||
final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
|
|
||||||
// we can only bulk-copy if the matching reader is also a Lucene40FieldsReader
|
|
||||||
if (fieldsReader != null && fieldsReader instanceof Lucene40StoredFieldsReader) {
|
|
||||||
matchingFieldsReader = (Lucene40StoredFieldsReader) fieldsReader;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reader.getLiveDocs() != null) {
|
|
||||||
docCount += copyFieldsWithDeletions(mergeState,
|
|
||||||
reader, matchingFieldsReader, rawDocLengths);
|
|
||||||
} else {
|
|
||||||
docCount += copyFieldsNoDeletions(mergeState,
|
|
||||||
reader, matchingFieldsReader, rawDocLengths);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finish(mergeState.fieldInfos, docCount);
|
|
||||||
return docCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Maximum number of contiguous documents to bulk-copy
|
|
||||||
when merging stored fields */
|
|
||||||
private final static int MAX_RAW_MERGE_DOCS = 4192;
|
|
||||||
|
|
||||||
private int copyFieldsWithDeletions(MergeState mergeState, final AtomicReader reader,
|
|
||||||
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
|
|
||||||
throws IOException {
|
|
||||||
int docCount = 0;
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
final Bits liveDocs = reader.getLiveDocs();
|
|
||||||
assert liveDocs != null;
|
|
||||||
if (matchingFieldsReader != null) {
|
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
|
||||||
for (int j = 0; j < maxDoc;) {
|
|
||||||
if (!liveDocs.get(j)) {
|
|
||||||
// skip deleted docs
|
|
||||||
++j;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// We can optimize this case (doing a bulk byte copy) since the field
|
|
||||||
// numbers are identical
|
|
||||||
int start = j, numDocs = 0;
|
|
||||||
do {
|
|
||||||
j++;
|
|
||||||
numDocs++;
|
|
||||||
if (j >= maxDoc) break;
|
|
||||||
if (!liveDocs.get(j)) {
|
|
||||||
j++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while(numDocs < MAX_RAW_MERGE_DOCS);
|
|
||||||
|
|
||||||
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
|
|
||||||
addRawDocuments(stream, rawDocLengths, numDocs);
|
|
||||||
docCount += numDocs;
|
|
||||||
mergeState.checkAbort.work(300 * numDocs);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int j = 0; j < maxDoc; j++) {
|
|
||||||
if (!liveDocs.get(j)) {
|
|
||||||
// skip deleted docs
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// TODO: this could be more efficient using
|
|
||||||
// FieldVisitor instead of loading/writing entire
|
|
||||||
// doc; ie we just have to renumber the field number
|
|
||||||
// on the fly?
|
|
||||||
// NOTE: it's very important to first assign to doc then pass it to
|
|
||||||
// fieldsWriter.addDocument; see LUCENE-1282
|
|
||||||
StoredDocument doc = reader.document(j);
|
|
||||||
addDocument(doc, mergeState.fieldInfos);
|
|
||||||
docCount++;
|
|
||||||
mergeState.checkAbort.work(300);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return docCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int copyFieldsNoDeletions(MergeState mergeState, final AtomicReader reader,
|
|
||||||
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
|
|
||||||
throws IOException {
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
int docCount = 0;
|
|
||||||
if (matchingFieldsReader != null) {
|
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
|
||||||
while (docCount < maxDoc) {
|
|
||||||
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
|
|
||||||
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, docCount, len);
|
|
||||||
addRawDocuments(stream, rawDocLengths, len);
|
|
||||||
docCount += len;
|
|
||||||
mergeState.checkAbort.work(300 * len);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (; docCount < maxDoc; docCount++) {
|
|
||||||
// NOTE: it's very important to first assign to doc then pass it to
|
|
||||||
// fieldsWriter.addDocument; see LUCENE-1282
|
|
||||||
StoredDocument doc = reader.document(docCount);
|
|
||||||
addDocument(doc, mergeState.fieldInfos);
|
|
||||||
mergeState.checkAbort.work(300);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return docCount;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -126,6 +126,6 @@ public class Lucene40TermVectorsFormat extends TermVectorsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
|
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
|
||||||
return new Lucene40TermVectorsWriter(directory, segmentInfo.name, context);
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -146,66 +145,11 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used for bulk copy when merging
|
|
||||||
IndexInput getTvdStream() {
|
|
||||||
return tvd;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used for bulk copy when merging
|
|
||||||
IndexInput getTvfStream() {
|
|
||||||
return tvf;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Not private to avoid synthetic access$NNN methods
|
// Not private to avoid synthetic access$NNN methods
|
||||||
void seekTvx(final int docNum) throws IOException {
|
void seekTvx(final int docNum) throws IOException {
|
||||||
tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
|
tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Retrieve the length (in bytes) of the tvd and tvf
|
|
||||||
* entries for the next numDocs starting with
|
|
||||||
* startDocID. This is used for bulk copying when
|
|
||||||
* merging segments, if the field numbers are
|
|
||||||
* congruent. Once this returns, the tvf & tvd streams
|
|
||||||
* are seeked to the startDocID. */
|
|
||||||
final void rawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) throws IOException {
|
|
||||||
|
|
||||||
if (tvx == null) {
|
|
||||||
Arrays.fill(tvdLengths, 0);
|
|
||||||
Arrays.fill(tvfLengths, 0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
seekTvx(startDocID);
|
|
||||||
|
|
||||||
long tvdPosition = tvx.readLong();
|
|
||||||
tvd.seek(tvdPosition);
|
|
||||||
|
|
||||||
long tvfPosition = tvx.readLong();
|
|
||||||
tvf.seek(tvfPosition);
|
|
||||||
|
|
||||||
long lastTvdPosition = tvdPosition;
|
|
||||||
long lastTvfPosition = tvfPosition;
|
|
||||||
|
|
||||||
int count = 0;
|
|
||||||
while (count < numDocs) {
|
|
||||||
final int docID = startDocID + count + 1;
|
|
||||||
assert docID <= numTotalDocs;
|
|
||||||
if (docID < numTotalDocs) {
|
|
||||||
tvdPosition = tvx.readLong();
|
|
||||||
tvfPosition = tvx.readLong();
|
|
||||||
} else {
|
|
||||||
tvdPosition = tvd.length();
|
|
||||||
tvfPosition = tvf.length();
|
|
||||||
assert count == numDocs-1;
|
|
||||||
}
|
|
||||||
tvdLengths[count] = (int) (tvdPosition-lastTvdPosition);
|
|
||||||
tvfLengths[count] = (int) (tvfPosition-lastTvfPosition);
|
|
||||||
count++;
|
|
||||||
lastTvdPosition = tvdPosition;
|
|
||||||
lastTvfPosition = tvfPosition;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.close(tvx, tvd, tvf);
|
IOUtils.close(tvx, tvd, tvf);
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class Lucene41Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TermVectorsFormat termVectorsFormat() {
|
public TermVectorsFormat termVectorsFormat() {
|
||||||
return vectorsFormat;
|
return vectorsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
@ -137,7 +137,7 @@ public class Lucene42Codec extends Codec {
|
||||||
|
|
||||||
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,7 +21,9 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
@ -66,13 +68,13 @@ public class Lucene42NormsFormat extends NormsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
return new Lucene42NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static final String DATA_CODEC = "Lucene41NormsData";
|
static final String DATA_CODEC = "Lucene41NormsData";
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.apache.lucene.codecs.lucene42;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads 4.2-4.8 norms.
|
||||||
|
* Implemented the same as docvalues, but with a different filename.
|
||||||
|
* @deprecated Only for reading old segments
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
class Lucene42NormsProducer extends NormsProducer {
|
||||||
|
private final Lucene42DocValuesProducer impl;
|
||||||
|
|
||||||
|
Lucene42NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||||
|
impl = new Lucene42DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||||
|
return impl.getNumeric(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
impl.checkIntegrity();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return impl.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
impl.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.codecs.lucene45;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
@ -140,7 +140,7 @@ public class Lucene45Codec extends Codec {
|
||||||
|
|
||||||
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.codecs.lucene46;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
@ -137,7 +137,7 @@ public class Lucene46Codec extends Codec {
|
||||||
|
|
||||||
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
private final NormsFormat normsFormat = new Lucene42NormsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.codecs.lucene49;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
@ -32,7 +32,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
|
import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
|
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat;
|
import org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
|
import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
|
||||||
|
@ -137,7 +136,7 @@ public class Lucene49Codec extends Codec {
|
||||||
|
|
||||||
private final NormsFormat normsFormat = new Lucene49NormsFormat() {
|
private final NormsFormat normsFormat = new Lucene49NormsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,12 +23,11 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
@ -38,7 +37,7 @@ import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURR
|
||||||
/**
|
/**
|
||||||
* Writer for {@link Lucene49NormsFormat}
|
* Writer for {@link Lucene49NormsFormat}
|
||||||
*/
|
*/
|
||||||
class Lucene49NormsConsumer extends DocValuesConsumer {
|
class Lucene49NormsConsumer extends NormsConsumer {
|
||||||
static final byte DELTA_COMPRESSED = 0;
|
static final byte DELTA_COMPRESSED = 0;
|
||||||
static final byte TABLE_COMPRESSED = 1;
|
static final byte TABLE_COMPRESSED = 1;
|
||||||
static final byte CONST_COMPRESSED = 2;
|
static final byte CONST_COMPRESSED = 2;
|
||||||
|
@ -74,7 +73,7 @@ class Lucene49NormsConsumer extends DocValuesConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
meta.writeVInt(field.number);
|
meta.writeVInt(field.number);
|
||||||
long minValue = Long.MAX_VALUE;
|
long minValue = Long.MAX_VALUE;
|
||||||
long maxValue = Long.MIN_VALUE;
|
long maxValue = Long.MIN_VALUE;
|
||||||
|
@ -186,26 +185,6 @@ class Lucene49NormsConsumer extends DocValuesConsumer {
|
||||||
meta = data = null;
|
meta = data = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
// specialized deduplication of long->ord for norms: 99.99999% of the time this will be a single-byte range.
|
// specialized deduplication of long->ord for norms: 99.99999% of the time this will be a single-byte range.
|
||||||
static class NormMap {
|
static class NormMap {
|
||||||
|
|
|
@ -20,9 +20,9 @@ package org.apache.lucene.codecs.lucene49;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
@ -103,12 +103,12 @@ public class Lucene49NormsFormat extends NormsFormat {
|
||||||
public Lucene49NormsFormat() {}
|
public Lucene49NormsFormat() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
return new Lucene49NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
return new Lucene49NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,20 +23,15 @@ import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.packed.BlockPackedReader;
|
import org.apache.lucene.util.packed.BlockPackedReader;
|
||||||
|
@ -52,7 +47,7 @@ import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.UNCOMPRESS
|
||||||
/**
|
/**
|
||||||
* Reader for {@link Lucene49NormsFormat}
|
* Reader for {@link Lucene49NormsFormat}
|
||||||
*/
|
*/
|
||||||
class Lucene49NormsProducer extends DocValuesProducer {
|
class Lucene49NormsProducer extends NormsProducer {
|
||||||
// metadata maps (just file pointers and minimal stuff)
|
// metadata maps (just file pointers and minimal stuff)
|
||||||
private final Map<Integer,NormsEntry> norms = new HashMap<>();
|
private final Map<Integer,NormsEntry> norms = new HashMap<>();
|
||||||
private final IndexInput data;
|
private final IndexInput data;
|
||||||
|
@ -134,7 +129,7 @@ class Lucene49NormsProducer extends DocValuesProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
public synchronized NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||||
NumericDocValues instance = instances.get(field.number);
|
NumericDocValues instance = instances.get(field.number);
|
||||||
if (instance == null) {
|
if (instance == null) {
|
||||||
instance = loadNorms(field);
|
instance = loadNorms(field);
|
||||||
|
@ -208,31 +203,6 @@ class Lucene49NormsProducer extends DocValuesProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
data.close();
|
data.close();
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -171,7 +172,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
|
|
||||||
private void writeNorms(SegmentWriteState state) throws IOException {
|
private void writeNorms(SegmentWriteState state) throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
DocValuesConsumer normsConsumer = null;
|
NormsConsumer normsConsumer = null;
|
||||||
try {
|
try {
|
||||||
if (state.fieldInfos.hasNorms()) {
|
if (state.fieldInfos.hasNorms()) {
|
||||||
NormsFormat normsFormat = state.segmentInfo.getCodec().normsFormat();
|
NormsFormat normsFormat = state.segmentInfo.getCodec().normsFormat();
|
||||||
|
@ -405,7 +406,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
|
|
||||||
case NUMERIC:
|
case NUMERIC:
|
||||||
if (fp.docValuesWriter == null) {
|
if (fp.docValuesWriter == null) {
|
||||||
fp.docValuesWriter = new NumericDocValuesWriter(fp.fieldInfo, bytesUsed, true);
|
fp.docValuesWriter = new NumericDocValuesWriter(fp.fieldInfo, bytesUsed);
|
||||||
}
|
}
|
||||||
((NumericDocValuesWriter) fp.docValuesWriter).addValue(docID, field.numericValue().longValue());
|
((NumericDocValuesWriter) fp.docValuesWriter).addValue(docID, field.numericValue().longValue());
|
||||||
break;
|
break;
|
||||||
|
@ -520,7 +521,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
PerField next;
|
PerField next;
|
||||||
|
|
||||||
// Lazy init'd:
|
// Lazy init'd:
|
||||||
NumericDocValuesWriter norms;
|
NormValuesWriter norms;
|
||||||
|
|
||||||
// reused
|
// reused
|
||||||
TokenStream tokenStream;
|
TokenStream tokenStream;
|
||||||
|
@ -547,7 +548,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
if (fieldInfo.omitsNorms() == false) {
|
if (fieldInfo.omitsNorms() == false) {
|
||||||
if (norms == null) {
|
if (norms == null) {
|
||||||
fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC);
|
fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC);
|
||||||
norms = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed, false);
|
norms = new NormValuesWriter(fieldInfo, docState.docWriter.bytesUsed);
|
||||||
}
|
}
|
||||||
norms.addValue(docState.docID, similarity.computeNorm(invertState));
|
norms.addValue(docState.docID, similarity.computeNorm(invertState));
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,11 +27,14 @@ import static org.apache.lucene.index.FilterAtomicReader.FilterTermsEnum;
|
||||||
|
|
||||||
/** A {@link Fields} implementation that merges multiple
|
/** A {@link Fields} implementation that merges multiple
|
||||||
* Fields into one, and maps around deleted documents.
|
* Fields into one, and maps around deleted documents.
|
||||||
* This is used for merging. */
|
* This is used for merging.
|
||||||
|
* @lucene.internal
|
||||||
class MappedMultiFields extends FilterFields {
|
*/
|
||||||
|
public class MappedMultiFields extends FilterFields {
|
||||||
final MergeState mergeState;
|
final MergeState mergeState;
|
||||||
|
|
||||||
|
/** Create a new MappedMultiFields for merging, based on the supplied
|
||||||
|
* mergestate and merged view of terms. */
|
||||||
public MappedMultiFields(MergeState mergeState, MultiFields multiFields) {
|
public MappedMultiFields(MergeState mergeState, MultiFields multiFields) {
|
||||||
super(multiFields);
|
super(multiFields);
|
||||||
this.mergeState = mergeState;
|
this.mergeState = mergeState;
|
||||||
|
|
|
@ -156,17 +156,6 @@ public class MergeState {
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
public int checkAbortCount;
|
public int checkAbortCount;
|
||||||
|
|
||||||
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
|
|
||||||
// but is this really so expensive to compute again in different components, versus once in SM?
|
|
||||||
|
|
||||||
/** {@link SegmentReader}s that have identical field
|
|
||||||
* name/number mapping, so their stored fields and term
|
|
||||||
* vectors may be bulk merged. */
|
|
||||||
public SegmentReader[] matchingSegmentReaders;
|
|
||||||
|
|
||||||
/** How many {@link #matchingSegmentReaders} are set. */
|
|
||||||
public int matchedCount;
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
MergeState(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort) {
|
MergeState(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort) {
|
||||||
this.readers = readers;
|
this.readers = readers;
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
|
import org.apache.lucene.util.Counter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
import org.apache.lucene.util.packed.PackedLongValues;
|
||||||
|
|
||||||
|
/** Buffers up pending long per doc, then flushes when
|
||||||
|
* segment flushes. */
|
||||||
|
class NormValuesWriter {
|
||||||
|
|
||||||
|
private final static long MISSING = 0L;
|
||||||
|
|
||||||
|
private PackedLongValues.Builder pending;
|
||||||
|
private final Counter iwBytesUsed;
|
||||||
|
private long bytesUsed;
|
||||||
|
private final FieldInfo fieldInfo;
|
||||||
|
|
||||||
|
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
|
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||||
|
bytesUsed = pending.ramBytesUsed();
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
this.iwBytesUsed = iwBytesUsed;
|
||||||
|
iwBytesUsed.addAndGet(bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addValue(int docID, long value) {
|
||||||
|
// Fill in any holes:
|
||||||
|
for (int i = (int)pending.size(); i < docID; ++i) {
|
||||||
|
pending.add(MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
pending.add(value);
|
||||||
|
updateBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateBytesUsed() {
|
||||||
|
final long newBytesUsed = pending.ramBytesUsed();
|
||||||
|
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
|
||||||
|
bytesUsed = newBytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finish(int maxDoc) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flush(SegmentWriteState state, NormsConsumer normsConsumer) throws IOException {
|
||||||
|
|
||||||
|
final int maxDoc = state.segmentInfo.getDocCount();
|
||||||
|
final PackedLongValues values = pending.build();
|
||||||
|
|
||||||
|
normsConsumer.addNormsField(fieldInfo,
|
||||||
|
new Iterable<Number>() {
|
||||||
|
@Override
|
||||||
|
public Iterator<Number> iterator() {
|
||||||
|
return new NumericIterator(maxDoc, values);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterates over the values we have in ram
|
||||||
|
private static class NumericIterator implements Iterator<Number> {
|
||||||
|
final PackedLongValues.Iterator iter;
|
||||||
|
final int size;
|
||||||
|
final int maxDoc;
|
||||||
|
int upto;
|
||||||
|
|
||||||
|
NumericIterator(int maxDoc, PackedLongValues values) {
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
this.iter = values.iterator();
|
||||||
|
this.size = (int) values.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return upto < maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Number next() {
|
||||||
|
if (!hasNext()) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
Long value;
|
||||||
|
if (upto < size) {
|
||||||
|
value = iter.next();
|
||||||
|
} else {
|
||||||
|
value = MISSING;
|
||||||
|
}
|
||||||
|
upto++;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -40,9 +40,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
||||||
private FixedBitSet docsWithField;
|
private FixedBitSet docsWithField;
|
||||||
private final FieldInfo fieldInfo;
|
private final FieldInfo fieldInfo;
|
||||||
|
|
||||||
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
|
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||||
docsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
|
docsWithField = new FixedBitSet(64);
|
||||||
bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
|
bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.iwBytesUsed = iwBytesUsed;
|
this.iwBytesUsed = iwBytesUsed;
|
||||||
|
@ -60,17 +60,15 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
pending.add(value);
|
pending.add(value);
|
||||||
if (docsWithField != null) {
|
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
||||||
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
|
docsWithField.set(docID);
|
||||||
docsWithField.set(docID);
|
|
||||||
}
|
|
||||||
|
|
||||||
updateBytesUsed();
|
updateBytesUsed();
|
||||||
}
|
}
|
||||||
|
|
||||||
private long docsWithFieldBytesUsed() {
|
private long docsWithFieldBytesUsed() {
|
||||||
// size of the long[] + some overhead
|
// size of the long[] + some overhead
|
||||||
return docsWithField == null ? 0 : RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateBytesUsed() {
|
private void updateBytesUsed() {
|
||||||
|
@ -126,13 +124,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
||||||
Long value;
|
Long value;
|
||||||
if (upto < size) {
|
if (upto < size) {
|
||||||
long v = iter.next();
|
long v = iter.next();
|
||||||
if (docsWithField == null || docsWithField.get(upto)) {
|
if (docsWithField.get(upto)) {
|
||||||
value = v;
|
value = v;
|
||||||
} else {
|
} else {
|
||||||
value = null;
|
value = null;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
value = docsWithField != null ? null : MISSING;
|
value = null;
|
||||||
}
|
}
|
||||||
upto++;
|
upto++;
|
||||||
return value;
|
return value;
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
|
@ -56,7 +57,7 @@ final class SegmentCoreReaders implements Accountable {
|
||||||
private final AtomicInteger ref = new AtomicInteger(1);
|
private final AtomicInteger ref = new AtomicInteger(1);
|
||||||
|
|
||||||
final FieldsProducer fields;
|
final FieldsProducer fields;
|
||||||
final DocValuesProducer normsProducer;
|
final NormsProducer normsProducer;
|
||||||
|
|
||||||
final StoredFieldsReader fieldsReaderOrig;
|
final StoredFieldsReader fieldsReaderOrig;
|
||||||
final TermVectorsReader termVectorsReaderOrig;
|
final TermVectorsReader termVectorsReaderOrig;
|
||||||
|
@ -166,7 +167,7 @@ final class SegmentCoreReaders implements Accountable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
assert normsProducer != null;
|
assert normsProducer != null;
|
||||||
norms = normsProducer.getNumeric(fi);
|
norms = normsProducer.getNorms(fi);
|
||||||
normFields.put(field, norms);
|
normFields.put(field, norms);
|
||||||
return norms;
|
return norms;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,173 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.IdentityHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/** Encapsulates multiple producers when there are docvalues updates as one producer */
|
||||||
|
// TODO: try to clean up close? no-op?
|
||||||
|
// TODO: add shared base class (also used by per-field-pf?) to allow "punching thru" to low level producer?
|
||||||
|
class SegmentDocValuesProducer extends DocValuesProducer {
|
||||||
|
|
||||||
|
private static final long LONG_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Long.class);
|
||||||
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
|
RamUsageEstimator.shallowSizeOfInstance(SegmentDocValuesProducer.class);
|
||||||
|
|
||||||
|
final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>();
|
||||||
|
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
|
||||||
|
final List<Long> dvGens = new ArrayList<>();
|
||||||
|
|
||||||
|
SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos fieldInfos, SegmentDocValues segDocValues, DocValuesFormat dvFormat) throws IOException {
|
||||||
|
Version ver = si.info.getVersion();
|
||||||
|
if (ver != null && ver.onOrAfter(Version.LUCENE_4_9_0)) {
|
||||||
|
DocValuesProducer baseProducer = null;
|
||||||
|
for (FieldInfo fi : fieldInfos) {
|
||||||
|
if (!fi.hasDocValues()) continue;
|
||||||
|
long docValuesGen = fi.getDocValuesGen();
|
||||||
|
if (docValuesGen == -1) {
|
||||||
|
if (baseProducer == null) {
|
||||||
|
// the base producer gets all the fields, so the Codec can validate properly
|
||||||
|
baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
||||||
|
dvGens.add(docValuesGen);
|
||||||
|
dvProducers.add(baseProducer);
|
||||||
|
}
|
||||||
|
dvProducersByField.put(fi.name, baseProducer);
|
||||||
|
} else {
|
||||||
|
assert !dvGens.contains(docValuesGen);
|
||||||
|
final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(new FieldInfo[] { fi }));
|
||||||
|
dvGens.add(docValuesGen);
|
||||||
|
dvProducers.add(dvp);
|
||||||
|
dvProducersByField.put(fi.name, dvp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For pre-4.9 indexes, especially with doc-values updates, multiple
|
||||||
|
// FieldInfos could belong to the same dvGen. Therefore need to make sure
|
||||||
|
// we initialize each DocValuesProducer once per gen.
|
||||||
|
Map<Long,List<FieldInfo>> genInfos = new HashMap<>();
|
||||||
|
for (FieldInfo fi : fieldInfos) {
|
||||||
|
if (!fi.hasDocValues()) continue;
|
||||||
|
List<FieldInfo> genFieldInfos = genInfos.get(fi.getDocValuesGen());
|
||||||
|
if (genFieldInfos == null) {
|
||||||
|
genFieldInfos = new ArrayList<>();
|
||||||
|
genInfos.put(fi.getDocValuesGen(), genFieldInfos);
|
||||||
|
}
|
||||||
|
genFieldInfos.add(fi);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
|
||||||
|
long docValuesGen = e.getKey();
|
||||||
|
List<FieldInfo> infos = e.getValue();
|
||||||
|
final DocValuesProducer dvp;
|
||||||
|
if (docValuesGen == -1) {
|
||||||
|
// we need to send all FieldInfos to gen=-1, but later we need to
|
||||||
|
// record the DVP only for the "true" gen=-1 fields (not updated)
|
||||||
|
dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
||||||
|
} else {
|
||||||
|
dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])));
|
||||||
|
}
|
||||||
|
dvGens.add(docValuesGen);
|
||||||
|
dvProducers.add(dvp);
|
||||||
|
for (FieldInfo fi : infos) {
|
||||||
|
dvProducersByField.put(fi.name, dvp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getNumeric(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getBinary(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getSorted(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getSortedNumeric(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getSortedSet(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||||
|
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||||
|
assert dvProducer != null;
|
||||||
|
return dvProducer.getDocsWithField(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
for (DocValuesProducer producer : dvProducers) {
|
||||||
|
producer.checkIntegrity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
throw new UnsupportedOperationException(); // there is separate ref tracking
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
long ramBytesUsed = BASE_RAM_BYTES_USED;
|
||||||
|
ramBytesUsed += dvGens.size() * LONG_RAM_BYTES_USED;
|
||||||
|
ramBytesUsed += dvProducers.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
|
ramBytesUsed += dvProducersByField.size() * 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
|
for (DocValuesProducer producer : dvProducers) {
|
||||||
|
ramBytesUsed += producer.ramBytesUsed();
|
||||||
|
}
|
||||||
|
return ramBytesUsed;
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,19 +18,17 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
|
||||||
|
@ -90,7 +88,6 @@ final class SegmentMerger {
|
||||||
// IndexWriter.close(false) takes to actually stop the
|
// IndexWriter.close(false) takes to actually stop the
|
||||||
// threads.
|
// threads.
|
||||||
mergeFieldInfos();
|
mergeFieldInfos();
|
||||||
setMatchingSegmentReaders();
|
|
||||||
long t0 = 0;
|
long t0 = 0;
|
||||||
if (mergeState.infoStream.isEnabled("SM")) {
|
if (mergeState.infoStream.isEnabled("SM")) {
|
||||||
t0 = System.nanoTime();
|
t0 = System.nanoTime();
|
||||||
|
@ -158,72 +155,7 @@ final class SegmentMerger {
|
||||||
DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(segmentWriteState);
|
DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(segmentWriteState);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (FieldInfo field : mergeState.fieldInfos) {
|
consumer.merge(mergeState);
|
||||||
DocValuesType type = field.getDocValuesType();
|
|
||||||
if (type != null) {
|
|
||||||
if (type == DocValuesType.NUMERIC) {
|
|
||||||
List<NumericDocValues> toMerge = new ArrayList<>();
|
|
||||||
List<Bits> docsWithField = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
NumericDocValues values = reader.getNumericDocValues(field.name);
|
|
||||||
Bits bits = reader.getDocsWithField(field.name);
|
|
||||||
if (values == null) {
|
|
||||||
values = DocValues.emptyNumeric();
|
|
||||||
bits = new Bits.MatchNoBits(reader.maxDoc());
|
|
||||||
}
|
|
||||||
toMerge.add(values);
|
|
||||||
docsWithField.add(bits);
|
|
||||||
}
|
|
||||||
consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
|
|
||||||
} else if (type == DocValuesType.BINARY) {
|
|
||||||
List<BinaryDocValues> toMerge = new ArrayList<>();
|
|
||||||
List<Bits> docsWithField = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
BinaryDocValues values = reader.getBinaryDocValues(field.name);
|
|
||||||
Bits bits = reader.getDocsWithField(field.name);
|
|
||||||
if (values == null) {
|
|
||||||
values = DocValues.emptyBinary();
|
|
||||||
bits = new Bits.MatchNoBits(reader.maxDoc());
|
|
||||||
}
|
|
||||||
toMerge.add(values);
|
|
||||||
docsWithField.add(bits);
|
|
||||||
}
|
|
||||||
consumer.mergeBinaryField(field, mergeState, toMerge, docsWithField);
|
|
||||||
} else if (type == DocValuesType.SORTED) {
|
|
||||||
List<SortedDocValues> toMerge = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
SortedDocValues values = reader.getSortedDocValues(field.name);
|
|
||||||
if (values == null) {
|
|
||||||
values = DocValues.emptySorted();
|
|
||||||
}
|
|
||||||
toMerge.add(values);
|
|
||||||
}
|
|
||||||
consumer.mergeSortedField(field, mergeState, toMerge);
|
|
||||||
} else if (type == DocValuesType.SORTED_SET) {
|
|
||||||
List<SortedSetDocValues> toMerge = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
SortedSetDocValues values = reader.getSortedSetDocValues(field.name);
|
|
||||||
if (values == null) {
|
|
||||||
values = DocValues.emptySortedSet();
|
|
||||||
}
|
|
||||||
toMerge.add(values);
|
|
||||||
}
|
|
||||||
consumer.mergeSortedSetField(field, mergeState, toMerge);
|
|
||||||
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
|
||||||
List<SortedNumericDocValues> toMerge = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
SortedNumericDocValues values = reader.getSortedNumericDocValues(field.name);
|
|
||||||
if (values == null) {
|
|
||||||
values = DocValues.emptySortedNumeric(reader.maxDoc());
|
|
||||||
}
|
|
||||||
toMerge.add(values);
|
|
||||||
}
|
|
||||||
consumer.mergeSortedNumericField(field, mergeState, toMerge);
|
|
||||||
} else {
|
|
||||||
throw new AssertionError("type=" + type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
@ -235,24 +167,10 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
|
private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
|
||||||
DocValuesConsumer consumer = codec.normsFormat().normsConsumer(segmentWriteState);
|
NormsConsumer consumer = codec.normsFormat().normsConsumer(segmentWriteState);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (FieldInfo field : mergeState.fieldInfos) {
|
consumer.merge(mergeState);
|
||||||
if (field.hasNorms()) {
|
|
||||||
List<NumericDocValues> toMerge = new ArrayList<>();
|
|
||||||
List<Bits> docsWithField = new ArrayList<>();
|
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
|
||||||
NumericDocValues norms = reader.getNormValues(field.name);
|
|
||||||
if (norms == null) {
|
|
||||||
norms = DocValues.emptyNumeric();
|
|
||||||
}
|
|
||||||
toMerge.add(norms);
|
|
||||||
docsWithField.add(new Bits.MatchAllBits(reader.maxDoc()));
|
|
||||||
}
|
|
||||||
consumer.mergeNumericField(field, mergeState, toMerge, docsWithField);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
@ -262,50 +180,6 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setMatchingSegmentReaders() {
|
|
||||||
// If the i'th reader is a SegmentReader and has
|
|
||||||
// identical fieldName -> number mapping, then this
|
|
||||||
// array will be non-null at position i:
|
|
||||||
int numReaders = mergeState.readers.size();
|
|
||||||
mergeState.matchingSegmentReaders = new SegmentReader[numReaders];
|
|
||||||
|
|
||||||
// If this reader is a SegmentReader, and all of its
|
|
||||||
// field name -> number mappings match the "merged"
|
|
||||||
// FieldInfos, then we can do a bulk copy of the
|
|
||||||
// stored fields:
|
|
||||||
for (int i = 0; i < numReaders; i++) {
|
|
||||||
AtomicReader reader = mergeState.readers.get(i);
|
|
||||||
// TODO: we may be able to broaden this to
|
|
||||||
// non-SegmentReaders, since FieldInfos is now
|
|
||||||
// required? But... this'd also require exposing
|
|
||||||
// bulk-copy (TVs and stored fields) API in foreign
|
|
||||||
// readers..
|
|
||||||
if (reader instanceof SegmentReader) {
|
|
||||||
SegmentReader segmentReader = (SegmentReader) reader;
|
|
||||||
boolean same = true;
|
|
||||||
FieldInfos segmentFieldInfos = segmentReader.getFieldInfos();
|
|
||||||
for (FieldInfo fi : segmentFieldInfos) {
|
|
||||||
FieldInfo other = mergeState.fieldInfos.fieldInfo(fi.number);
|
|
||||||
if (other == null || !other.name.equals(fi.name)) {
|
|
||||||
same = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (same) {
|
|
||||||
mergeState.matchingSegmentReaders[i] = segmentReader;
|
|
||||||
mergeState.matchedCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mergeState.infoStream.isEnabled("SM")) {
|
|
||||||
mergeState.infoStream.message("SM", "merge store matchedCount=" + mergeState.matchedCount + " vs " + mergeState.readers.size());
|
|
||||||
if (mergeState.matchedCount != mergeState.readers.size()) {
|
|
||||||
mergeState.infoStream.message("SM", "" + (mergeState.readers.size() - mergeState.matchedCount) + " non-bulk merges");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void mergeFieldInfos() throws IOException {
|
public void mergeFieldInfos() throws IOException {
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
for (AtomicReader reader : mergeState.readers) {
|
||||||
|
@ -318,7 +192,7 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Merge stored fields from each of the segments into the new one.
|
||||||
* @return The number of documents in all of the readers
|
* @return The number of documents in all of the readers
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
|
@ -326,11 +200,19 @@ final class SegmentMerger {
|
||||||
private int mergeFields() throws IOException {
|
private int mergeFields() throws IOException {
|
||||||
final StoredFieldsWriter fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, mergeState.segmentInfo, context);
|
final StoredFieldsWriter fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, mergeState.segmentInfo, context);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
int numDocs;
|
||||||
try {
|
try {
|
||||||
return fieldsWriter.merge(mergeState);
|
numDocs = fieldsWriter.merge(mergeState);
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
fieldsWriter.close();
|
if (success) {
|
||||||
|
IOUtils.close(fieldsWriter);
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(fieldsWriter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -340,11 +222,19 @@ final class SegmentMerger {
|
||||||
private int mergeVectors() throws IOException {
|
private int mergeVectors() throws IOException {
|
||||||
final TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context);
|
final TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
int numDocs;
|
||||||
try {
|
try {
|
||||||
return termVectorsWriter.merge(mergeState);
|
numDocs = termVectorsWriter.merge(mergeState);
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
termVectorsWriter.close();
|
if (success) {
|
||||||
|
IOUtils.close(termVectorsWriter);
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(termVectorsWriter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: removes any "all deleted" readers from mergeState.readers
|
// NOTE: removes any "all deleted" readers from mergeState.readers
|
||||||
|
@ -374,31 +264,10 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException {
|
private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException {
|
||||||
|
|
||||||
final List<Fields> fields = new ArrayList<>();
|
|
||||||
final List<ReaderSlice> slices = new ArrayList<>();
|
|
||||||
|
|
||||||
int docBase = 0;
|
|
||||||
|
|
||||||
for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
|
|
||||||
final AtomicReader reader = mergeState.readers.get(readerIndex);
|
|
||||||
final Fields f = reader.fields();
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
if (f != null) {
|
|
||||||
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
|
|
||||||
fields.add(f);
|
|
||||||
}
|
|
||||||
docBase += maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
Fields mergedFields = new MappedMultiFields(mergeState,
|
|
||||||
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
|
||||||
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
|
|
||||||
|
|
||||||
FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
|
FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
consumer.write(mergedFields);
|
consumer.merge(mergeState);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
|
|
@ -18,18 +18,15 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.IdentityHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
@ -41,8 +38,6 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IndexReader implementation over a single segment.
|
* IndexReader implementation over a single segment.
|
||||||
|
@ -56,7 +51,6 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
private static final long BASE_RAM_BYTES_USED =
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
RamUsageEstimator.shallowSizeOfInstance(SegmentReader.class)
|
RamUsageEstimator.shallowSizeOfInstance(SegmentReader.class)
|
||||||
+ RamUsageEstimator.shallowSizeOfInstance(SegmentDocValues.class);
|
+ RamUsageEstimator.shallowSizeOfInstance(SegmentDocValues.class);
|
||||||
private static final long LONG_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Long.class);
|
|
||||||
|
|
||||||
private final SegmentCommitInfo si;
|
private final SegmentCommitInfo si;
|
||||||
private final Bits liveDocs;
|
private final Bits liveDocs;
|
||||||
|
@ -83,12 +77,8 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>();
|
final DocValuesProducer docValuesProducer;
|
||||||
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
|
|
||||||
|
|
||||||
final FieldInfos fieldInfos;
|
final FieldInfos fieldInfos;
|
||||||
|
|
||||||
private final List<Long> dvGens = new ArrayList<>();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new SegmentReader with a new core.
|
* Constructs a new SegmentReader with a new core.
|
||||||
|
@ -121,7 +111,9 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
numDocs = si.info.getDocCount() - si.getDelCount();
|
numDocs = si.info.getDocCount() - si.getDelCount();
|
||||||
|
|
||||||
if (fieldInfos.hasDocValues()) {
|
if (fieldInfos.hasDocValues()) {
|
||||||
initDocValuesProducers(codec);
|
docValuesProducer = initDocValuesProducer(codec);
|
||||||
|
} else {
|
||||||
|
docValuesProducer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
|
@ -171,7 +163,9 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fieldInfos.hasDocValues()) {
|
if (fieldInfos.hasDocValues()) {
|
||||||
initDocValuesProducers(codec);
|
docValuesProducer = initDocValuesProducer(codec);
|
||||||
|
} else {
|
||||||
|
docValuesProducer = null;
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -182,81 +176,15 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the per-field DocValuesProducer
|
// initialize the per-field DocValuesProducer
|
||||||
@SuppressWarnings("deprecation")
|
private DocValuesProducer initDocValuesProducer(Codec codec) throws IOException {
|
||||||
private void initDocValuesProducers(Codec codec) throws IOException {
|
|
||||||
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
|
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
|
||||||
final DocValuesFormat dvFormat = codec.docValuesFormat();
|
final DocValuesFormat dvFormat = codec.docValuesFormat();
|
||||||
|
|
||||||
if (!si.hasFieldUpdates()) {
|
if (!si.hasFieldUpdates()) {
|
||||||
// simple case, no DocValues updates
|
// simple case, no DocValues updates
|
||||||
final DocValuesProducer dvp = segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
return segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
||||||
dvGens.add(-1L);
|
|
||||||
dvProducers.add(dvp);
|
|
||||||
for (FieldInfo fi : fieldInfos) {
|
|
||||||
if (!fi.hasDocValues()) continue;
|
|
||||||
assert fi.getDocValuesGen() == -1;
|
|
||||||
dvProducersByField.put(fi.name, dvp);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Version ver = si.info.getVersion();
|
|
||||||
if (ver != null && ver.onOrAfter(Version.LUCENE_4_9_0)) {
|
|
||||||
DocValuesProducer baseProducer = null;
|
|
||||||
for (FieldInfo fi : fieldInfos) {
|
|
||||||
if (!fi.hasDocValues()) continue;
|
|
||||||
long docValuesGen = fi.getDocValuesGen();
|
|
||||||
if (docValuesGen == -1) {
|
|
||||||
if (baseProducer == null) {
|
|
||||||
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gen=" + docValuesGen + "; field=" + fi.name);
|
|
||||||
// the base producer gets all the fields, so the Codec can validate properly
|
|
||||||
baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
|
||||||
dvGens.add(docValuesGen);
|
|
||||||
dvProducers.add(baseProducer);
|
|
||||||
}
|
|
||||||
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gen=" + docValuesGen + "; field=" + fi.name);
|
|
||||||
dvProducersByField.put(fi.name, baseProducer);
|
|
||||||
} else {
|
|
||||||
assert !dvGens.contains(docValuesGen);
|
|
||||||
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gen=" + docValuesGen + "; field=" + fi.name);
|
|
||||||
final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(new FieldInfo[] { fi }));
|
|
||||||
dvGens.add(docValuesGen);
|
|
||||||
dvProducers.add(dvp);
|
|
||||||
dvProducersByField.put(fi.name, dvp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// For pre-4.9 indexes, especially with doc-values updates, multiple
|
return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
|
||||||
// FieldInfos could belong to the same dvGen. Therefore need to make sure
|
|
||||||
// we initialize each DocValuesProducer once per gen.
|
|
||||||
Map<Long,List<FieldInfo>> genInfos = new HashMap<>();
|
|
||||||
for (FieldInfo fi : fieldInfos) {
|
|
||||||
if (!fi.hasDocValues()) continue;
|
|
||||||
List<FieldInfo> genFieldInfos = genInfos.get(fi.getDocValuesGen());
|
|
||||||
if (genFieldInfos == null) {
|
|
||||||
genFieldInfos = new ArrayList<>();
|
|
||||||
genInfos.put(fi.getDocValuesGen(), genFieldInfos);
|
|
||||||
}
|
|
||||||
genFieldInfos.add(fi);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Map.Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
|
|
||||||
long docValuesGen = e.getKey();
|
|
||||||
List<FieldInfo> infos = e.getValue();
|
|
||||||
final DocValuesProducer dvp;
|
|
||||||
if (docValuesGen == -1) {
|
|
||||||
// we need to send all FieldInfos to gen=-1, but later we need to
|
|
||||||
// record the DVP only for the "true" gen=-1 fields (not updated)
|
|
||||||
dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
|
|
||||||
} else {
|
|
||||||
dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])));
|
|
||||||
}
|
|
||||||
dvGens.add(docValuesGen);
|
|
||||||
dvProducers.add(dvp);
|
|
||||||
for (FieldInfo fi : infos) {
|
|
||||||
dvProducersByField.put(fi.name, dvp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,11 +233,14 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
try {
|
try {
|
||||||
core.decRef();
|
core.decRef();
|
||||||
} finally {
|
} finally {
|
||||||
dvProducersByField.clear();
|
|
||||||
try {
|
try {
|
||||||
IOUtils.close(docValuesLocal, docsWithFieldLocal);
|
IOUtils.close(docValuesLocal, docsWithFieldLocal);
|
||||||
} finally {
|
} finally {
|
||||||
segDocValues.decRef(dvGens);
|
if (docValuesProducer instanceof SegmentDocValuesProducer) {
|
||||||
|
segDocValues.decRef(((SegmentDocValuesProducer)docValuesProducer).dvGens);
|
||||||
|
} else if (docValuesProducer != null) {
|
||||||
|
segDocValues.decRef(Collections.singletonList(-1L));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -319,14 +250,6 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return fieldInfos;
|
return fieldInfos;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Expert: retrieve thread-private {@link
|
|
||||||
* StoredFieldsReader}
|
|
||||||
* @lucene.internal */
|
|
||||||
public StoredFieldsReader getFieldsReader() {
|
|
||||||
ensureOpen();
|
|
||||||
return core.fieldsReaderLocal.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||||
|
@ -360,6 +283,28 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
return core.termVectorsLocal.get();
|
return core.termVectorsLocal.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Expert: retrieve thread-private {@link
|
||||||
|
* StoredFieldsReader}
|
||||||
|
* @lucene.internal */
|
||||||
|
public StoredFieldsReader getFieldsReader() {
|
||||||
|
ensureOpen();
|
||||||
|
return core.fieldsReaderLocal.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Expert: retrieve underlying NormsProducer
|
||||||
|
* @lucene.internal */
|
||||||
|
public NormsProducer getNormsReader() {
|
||||||
|
ensureOpen();
|
||||||
|
return core.normsProducer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Expert: retrieve underlying DocValuesProducer
|
||||||
|
* @lucene.internal */
|
||||||
|
public DocValuesProducer getDocValuesReader() {
|
||||||
|
ensureOpen();
|
||||||
|
return docValuesProducer;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields getTermVectors(int docID) throws IOException {
|
public Fields getTermVectors(int docID) throws IOException {
|
||||||
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||||
|
@ -455,9 +400,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
NumericDocValues dv = docValuesProducer.getNumeric(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
NumericDocValues dv = dvProducer.getNumeric(fi);
|
|
||||||
dvFields.put(field, dv);
|
dvFields.put(field, dv);
|
||||||
return dv;
|
return dv;
|
||||||
}
|
}
|
||||||
|
@ -481,9 +424,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
// Field was not indexed with doc values
|
// Field was not indexed with doc values
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
Bits dv = docValuesProducer.getDocsWithField(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
Bits dv = dvProducer.getDocsWithField(fi);
|
|
||||||
dvFields.put(field, dv);
|
dvFields.put(field, dv);
|
||||||
return dv;
|
return dv;
|
||||||
}
|
}
|
||||||
|
@ -501,9 +442,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
|
|
||||||
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
|
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
|
||||||
if (dvs == null) {
|
if (dvs == null) {
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
dvs = docValuesProducer.getBinary(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
dvs = dvProducer.getBinary(fi);
|
|
||||||
dvFields.put(field, dvs);
|
dvFields.put(field, dvs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -523,9 +462,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
SortedDocValues dv = docValuesProducer.getSorted(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
SortedDocValues dv = dvProducer.getSorted(fi);
|
|
||||||
dvFields.put(field, dv);
|
dvFields.put(field, dv);
|
||||||
return dv;
|
return dv;
|
||||||
}
|
}
|
||||||
|
@ -544,9 +481,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
SortedNumericDocValues dv = docValuesProducer.getSortedNumeric(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
SortedNumericDocValues dv = dvProducer.getSortedNumeric(fi);
|
|
||||||
dvFields.put(field, dv);
|
dvFields.put(field, dv);
|
||||||
return dv;
|
return dv;
|
||||||
}
|
}
|
||||||
|
@ -565,9 +500,7 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocValuesProducer dvProducer = dvProducersByField.get(field);
|
SortedSetDocValues dv = docValuesProducer.getSortedSet(fi);
|
||||||
assert dvProducer != null;
|
|
||||||
SortedSetDocValues dv = dvProducer.getSortedSet(fi);
|
|
||||||
dvFields.put(field, dv);
|
dvFields.put(field, dv);
|
||||||
return dv;
|
return dv;
|
||||||
}
|
}
|
||||||
|
@ -595,13 +528,8 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
long ramBytesUsed = BASE_RAM_BYTES_USED;
|
long ramBytesUsed = BASE_RAM_BYTES_USED;
|
||||||
ramBytesUsed += dvGens.size() * LONG_RAM_BYTES_USED;
|
if (docValuesProducer != null) {
|
||||||
ramBytesUsed += dvProducers.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
ramBytesUsed += docValuesProducer.ramBytesUsed();
|
||||||
ramBytesUsed += dvProducersByField.size() * 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
|
||||||
if (dvProducers != null) {
|
|
||||||
for (DocValuesProducer producer : dvProducers) {
|
|
||||||
ramBytesUsed += producer.ramBytesUsed();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (core != null) {
|
if (core != null) {
|
||||||
ramBytesUsed += core.ramBytesUsed();
|
ramBytesUsed += core.ramBytesUsed();
|
||||||
|
@ -633,10 +561,8 @@ public final class SegmentReader extends AtomicReader implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// docvalues
|
// docvalues
|
||||||
if (dvProducers != null) {
|
if (docValuesProducer != null) {
|
||||||
for (DocValuesProducer producer : dvProducers) {
|
docValuesProducer.checkIntegrity();
|
||||||
producer.checkIntegrity();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter;
|
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -40,8 +40,8 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
|
||||||
*/
|
*/
|
||||||
public void testBasic() throws IOException {
|
public void testBasic() throws IOException {
|
||||||
Set<String> fileExtensions = new HashSet<>();
|
Set<String> fileExtensions = new HashSet<>();
|
||||||
fileExtensions.add(Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
|
fileExtensions.add(CompressingStoredFieldsWriter.FIELDS_EXTENSION);
|
||||||
fileExtensions.add(Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
|
fileExtensions.add(CompressingStoredFieldsWriter.FIELDS_INDEX_EXTENSION);
|
||||||
|
|
||||||
MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random(), new RAMDirectory());
|
MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random(), new RAMDirectory());
|
||||||
primaryDir.setCheckIndexOnClose(false); // only part of an index
|
primaryDir.setCheckIndexOnClose(false); // only part of an index
|
||||||
|
|
|
@ -207,54 +207,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class AssertingNormsConsumer extends DocValuesConsumer {
|
static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
|
||||||
private final DocValuesConsumer in;
|
|
||||||
private final int maxDoc;
|
|
||||||
|
|
||||||
AssertingNormsConsumer(DocValuesConsumer in, int maxDoc) {
|
|
||||||
this.in = in;
|
|
||||||
this.maxDoc = maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
|
||||||
int count = 0;
|
|
||||||
for (Number v : values) {
|
|
||||||
assert v != null;
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
assert count == maxDoc;
|
|
||||||
checkIterator(values.iterator(), maxDoc, false);
|
|
||||||
in.addNumericField(field, values);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
|
|
||||||
for (long i = 0; i < expectedSize; i++) {
|
for (long i = 0; i < expectedSize; i++) {
|
||||||
boolean hasNext = iterator.hasNext();
|
boolean hasNext = iterator.hasNext();
|
||||||
assert hasNext;
|
assert hasNext;
|
||||||
|
@ -287,8 +240,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||||
assert field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC ||
|
assert field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC;
|
||||||
field.getNormType() == FieldInfo.DocValuesType.NUMERIC;
|
|
||||||
NumericDocValues values = in.getNumeric(field);
|
NumericDocValues values = in.getNumeric(field);
|
||||||
assert values != null;
|
assert values != null;
|
||||||
return new AssertingAtomicReader.AssertingNumericDocValues(values, maxDoc);
|
return new AssertingAtomicReader.AssertingNumericDocValues(values, maxDoc);
|
||||||
|
|
|
@ -19,12 +19,13 @@ package org.apache.lucene.codecs.asserting;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingNormsConsumer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
|
import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
|
||||||
|
import org.apache.lucene.index.AssertingAtomicReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
@ -35,17 +36,77 @@ public class AssertingNormsFormat extends NormsFormat {
|
||||||
private final NormsFormat in = new Lucene49NormsFormat();
|
private final NormsFormat in = new Lucene49NormsFormat();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
DocValuesConsumer consumer = in.normsConsumer(state);
|
NormsConsumer consumer = in.normsConsumer(state);
|
||||||
assert consumer != null;
|
assert consumer != null;
|
||||||
return new AssertingNormsConsumer(consumer, state.segmentInfo.getDocCount());
|
return new AssertingNormsConsumer(consumer, state.segmentInfo.getDocCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
assert state.fieldInfos.hasNorms();
|
assert state.fieldInfos.hasNorms();
|
||||||
DocValuesProducer producer = in.normsProducer(state);
|
NormsProducer producer = in.normsProducer(state);
|
||||||
assert producer != null;
|
assert producer != null;
|
||||||
return new AssertingDocValuesProducer(producer, state.segmentInfo.getDocCount());
|
return new AssertingNormsProducer(producer, state.segmentInfo.getDocCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
static class AssertingNormsConsumer extends NormsConsumer {
|
||||||
|
private final NormsConsumer in;
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
AssertingNormsConsumer(NormsConsumer in, int maxDoc) {
|
||||||
|
this.in = in;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
int count = 0;
|
||||||
|
for (Number v : values) {
|
||||||
|
assert v != null;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assert count == maxDoc;
|
||||||
|
AssertingDocValuesFormat.checkIterator(values.iterator(), maxDoc, false);
|
||||||
|
in.addNormsField(field, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class AssertingNormsProducer extends NormsProducer {
|
||||||
|
private final NormsProducer in;
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
AssertingNormsProducer(NormsProducer in, int maxDoc) {
|
||||||
|
this.in = in;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNorms(FieldInfo field) throws IOException {
|
||||||
|
assert field.getNormType() == FieldInfo.DocValuesType.NUMERIC;
|
||||||
|
NumericDocValues values = in.getNorms(field);
|
||||||
|
assert values != null;
|
||||||
|
return new AssertingAtomicReader.AssertingNumericDocValues(values, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return in.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
in.checkIntegrity();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
|
||||||
import org.apache.lucene.index.AssertingAtomicReader;
|
import org.apache.lucene.index.AssertingAtomicReader;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
@ -33,10 +33,10 @@ import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Just like {@link Lucene40TermVectorsFormat} but with additional asserts.
|
* Just like {@link Lucene42TermVectorsFormat} but with additional asserts.
|
||||||
*/
|
*/
|
||||||
public class AssertingTermVectorsFormat extends TermVectorsFormat {
|
public class AssertingTermVectorsFormat extends TermVectorsFormat {
|
||||||
private final TermVectorsFormat in = new Lucene40TermVectorsFormat();
|
private final TermVectorsFormat in = new Lucene42TermVectorsFormat();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
|
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
|
||||||
|
|
|
@ -23,11 +23,11 @@ import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
|
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
|
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
|
||||||
import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat;
|
import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
|
import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
|
||||||
|
|
||||||
/** Codec that tries to use as little ram as possible because he spent all his money on beer */
|
/** Codec that tries to use as little ram as possible because he spent all his money on beer */
|
||||||
|
@ -37,9 +37,8 @@ public class CheapBastardCodec extends FilterCodec {
|
||||||
|
|
||||||
// TODO: would be better to have no terms index at all and bsearch a terms dict
|
// TODO: would be better to have no terms index at all and bsearch a terms dict
|
||||||
private final PostingsFormat postings = new Lucene41PostingsFormat(100, 200);
|
private final PostingsFormat postings = new Lucene41PostingsFormat(100, 200);
|
||||||
// uncompressing versions, waste lots of disk but no ram
|
private final StoredFieldsFormat storedFields = new Lucene41StoredFieldsFormat();
|
||||||
private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
|
private final TermVectorsFormat termVectors = new Lucene42TermVectorsFormat();
|
||||||
private final TermVectorsFormat termVectors = new Lucene40TermVectorsFormat();
|
|
||||||
private final DocValuesFormat docValues = new Lucene410DocValuesFormat();
|
private final DocValuesFormat docValues = new Lucene410DocValuesFormat();
|
||||||
private final NormsFormat norms = new Lucene49NormsFormat();
|
private final NormsFormat norms = new Lucene49NormsFormat();
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,10 @@ package org.apache.lucene.codecs.cranky;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
@ -36,15 +37,41 @@ class CrankyNormsFormat extends NormsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
if (random.nextInt(100) == 0) {
|
if (random.nextInt(100) == 0) {
|
||||||
throw new IOException("Fake IOException from NormsFormat.fieldsConsumer()");
|
throw new IOException("Fake IOException from NormsFormat.normsConsumer()");
|
||||||
}
|
}
|
||||||
return new CrankyDocValuesFormat.CrankyDocValuesConsumer(delegate.normsConsumer(state), random);
|
return new CrankyNormsConsumer(delegate.normsConsumer(state), random);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
public NormsProducer normsProducer(SegmentReadState state) throws IOException {
|
||||||
return delegate.normsProducer(state);
|
return delegate.normsProducer(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class CrankyNormsConsumer extends NormsConsumer {
|
||||||
|
final NormsConsumer delegate;
|
||||||
|
final Random random;
|
||||||
|
|
||||||
|
CrankyNormsConsumer(NormsConsumer delegate, Random random) {
|
||||||
|
this.delegate = delegate;
|
||||||
|
this.random = random;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
delegate.close();
|
||||||
|
if (random.nextInt(100) == 0) {
|
||||||
|
throw new IOException("Fake IOException from NormsConsumer.close()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
if (random.nextInt(100) == 0) {
|
||||||
|
throw new IOException("Fake IOException from NormsConsumer.addNormsField()");
|
||||||
|
}
|
||||||
|
delegate.addNormsField(field, values);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,8 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -43,6 +44,8 @@ public final class Lucene40RWCodec extends Lucene40Codec {
|
||||||
|
|
||||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||||
|
private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat();
|
||||||
|
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosFormat fieldInfosFormat() {
|
public FieldInfosFormat fieldInfosFormat() {
|
||||||
|
@ -58,5 +61,14 @@ public final class Lucene40RWCodec extends Lucene40Codec {
|
||||||
public NormsFormat normsFormat() {
|
public NormsFormat normsFormat() {
|
||||||
return norms;
|
return norms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StoredFieldsFormat storedFieldsFormat() {
|
||||||
|
return stored;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermVectorsFormat termVectorsFormat() {
|
||||||
|
return vectors;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,8 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -29,14 +30,25 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
public class Lucene40RWNormsFormat extends Lucene40NormsFormat {
|
public class Lucene40RWNormsFormat extends Lucene40NormsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
||||||
return super.normsConsumer(state);
|
return super.normsConsumer(state);
|
||||||
} else {
|
} else {
|
||||||
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
||||||
"nrm",
|
"nrm",
|
||||||
IndexFileNames.COMPOUND_FILE_EXTENSION);
|
IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||||
return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
||||||
|
return new NormsConsumer() {
|
||||||
|
@Override
|
||||||
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
impl.addNumericField(field, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
impl.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simulates writing Lucene 4.0 Stored Fields Format.
|
||||||
|
*/
|
||||||
|
public class Lucene40RWStoredFieldsFormat extends Lucene40StoredFieldsFormat {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
|
||||||
|
if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
||||||
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
|
} else {
|
||||||
|
return new Lucene40StoredFieldsWriter(directory, si.name, context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simulates writing Lucene 4.0 Stored Fields Format.
|
||||||
|
*/
|
||||||
|
public class Lucene40RWTermVectorsFormat extends Lucene40TermVectorsFormat {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
|
||||||
|
if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
||||||
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
|
} else {
|
||||||
|
return new Lucene40TermVectorsWriter(directory, segmentInfo.name, context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,188 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.StorableField;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsReader.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class responsible for writing stored document fields.
|
||||||
|
* <p/>
|
||||||
|
* It uses <segment>.fdt and <segment>.fdx; files.
|
||||||
|
*
|
||||||
|
* @see Lucene40StoredFieldsFormat
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
||||||
|
|
||||||
|
private final Directory directory;
|
||||||
|
private final String segment;
|
||||||
|
private IndexOutput fieldsStream;
|
||||||
|
private IndexOutput indexStream;
|
||||||
|
private final RAMOutputStream fieldsBuffer = new RAMOutputStream();
|
||||||
|
|
||||||
|
/** Sole constructor. */
|
||||||
|
public Lucene40StoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||||
|
assert directory != null;
|
||||||
|
this.directory = directory;
|
||||||
|
this.segment = segment;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
||||||
|
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
|
||||||
|
|
||||||
|
CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
|
||||||
|
CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
|
||||||
|
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int numStoredFields;
|
||||||
|
|
||||||
|
// Writes the contents of buffer into the fields stream
|
||||||
|
// and adds a new entry for this document into the index
|
||||||
|
// stream. This assumes the buffer was already written
|
||||||
|
// in the correct fields format.
|
||||||
|
@Override
|
||||||
|
public void startDocument() throws IOException {
|
||||||
|
indexStream.writeLong(fieldsStream.getFilePointer());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDocument() throws IOException {
|
||||||
|
fieldsStream.writeVInt(numStoredFields);
|
||||||
|
fieldsBuffer.writeTo(fieldsStream);
|
||||||
|
fieldsBuffer.reset();
|
||||||
|
numStoredFields = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
IOUtils.close(fieldsStream, indexStream);
|
||||||
|
} finally {
|
||||||
|
fieldsStream = indexStream = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void abort() {
|
||||||
|
try {
|
||||||
|
close();
|
||||||
|
} catch (Throwable ignored) {}
|
||||||
|
IOUtils.deleteFilesIgnoringExceptions(directory,
|
||||||
|
IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION),
|
||||||
|
IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeField(FieldInfo info, StorableField field) throws IOException {
|
||||||
|
numStoredFields++;
|
||||||
|
|
||||||
|
fieldsBuffer.writeVInt(info.number);
|
||||||
|
int bits = 0;
|
||||||
|
final BytesRef bytes;
|
||||||
|
final String string;
|
||||||
|
// TODO: maybe a field should serialize itself?
|
||||||
|
// this way we don't bake into indexer all these
|
||||||
|
// specific encodings for different fields? and apps
|
||||||
|
// can customize...
|
||||||
|
|
||||||
|
Number number = field.numericValue();
|
||||||
|
if (number != null) {
|
||||||
|
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
||||||
|
bits |= FIELD_IS_NUMERIC_INT;
|
||||||
|
} else if (number instanceof Long) {
|
||||||
|
bits |= FIELD_IS_NUMERIC_LONG;
|
||||||
|
} else if (number instanceof Float) {
|
||||||
|
bits |= FIELD_IS_NUMERIC_FLOAT;
|
||||||
|
} else if (number instanceof Double) {
|
||||||
|
bits |= FIELD_IS_NUMERIC_DOUBLE;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
|
||||||
|
}
|
||||||
|
string = null;
|
||||||
|
bytes = null;
|
||||||
|
} else {
|
||||||
|
bytes = field.binaryValue();
|
||||||
|
if (bytes != null) {
|
||||||
|
bits |= FIELD_IS_BINARY;
|
||||||
|
string = null;
|
||||||
|
} else {
|
||||||
|
string = field.stringValue();
|
||||||
|
if (string == null) {
|
||||||
|
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldsBuffer.writeByte((byte) bits);
|
||||||
|
|
||||||
|
if (bytes != null) {
|
||||||
|
fieldsBuffer.writeVInt(bytes.length);
|
||||||
|
fieldsBuffer.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
} else if (string != null) {
|
||||||
|
fieldsBuffer.writeString(field.stringValue());
|
||||||
|
} else {
|
||||||
|
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
||||||
|
fieldsBuffer.writeInt(number.intValue());
|
||||||
|
} else if (number instanceof Long) {
|
||||||
|
fieldsBuffer.writeLong(number.longValue());
|
||||||
|
} else if (number instanceof Float) {
|
||||||
|
fieldsBuffer.writeInt(Float.floatToIntBits(number.floatValue()));
|
||||||
|
} else if (number instanceof Double) {
|
||||||
|
fieldsBuffer.writeLong(Double.doubleToLongBits(number.doubleValue()));
|
||||||
|
} else {
|
||||||
|
throw new AssertionError("Cannot get here");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(FieldInfos fis, int numDocs) {
|
||||||
|
long indexFP = indexStream.getFilePointer();
|
||||||
|
if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexFP)
|
||||||
|
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
||||||
|
// we detect that the bug has struck, here, and
|
||||||
|
// throw an exception to prevent the corruption from
|
||||||
|
// entering the index. See LUCENE-1282 for
|
||||||
|
// details.
|
||||||
|
throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexFP + " (wrote numDocs=" + ((indexFP-HEADER_LENGTH_IDX)/8.0) + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,21 +20,15 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.MergeState;
|
|
||||||
import org.apache.lucene.index.SegmentReader;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -285,144 +279,6 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION),
|
IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION),
|
||||||
IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION));
|
IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Do a bulk copy of numDocs documents from reader to our
|
|
||||||
* streams. This is used to expedite merging, if the
|
|
||||||
* field numbers are congruent.
|
|
||||||
*/
|
|
||||||
private void addRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) throws IOException {
|
|
||||||
long tvdPosition = tvd.getFilePointer();
|
|
||||||
long tvfPosition = tvf.getFilePointer();
|
|
||||||
long tvdStart = tvdPosition;
|
|
||||||
long tvfStart = tvfPosition;
|
|
||||||
for(int i=0;i<numDocs;i++) {
|
|
||||||
tvx.writeLong(tvdPosition);
|
|
||||||
tvdPosition += tvdLengths[i];
|
|
||||||
tvx.writeLong(tvfPosition);
|
|
||||||
tvfPosition += tvfLengths[i];
|
|
||||||
}
|
|
||||||
tvd.copyBytes(reader.getTvdStream(), tvdPosition-tvdStart);
|
|
||||||
tvf.copyBytes(reader.getTvfStream(), tvfPosition-tvfStart);
|
|
||||||
assert tvd.getFilePointer() == tvdPosition;
|
|
||||||
assert tvf.getFilePointer() == tvfPosition;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final int merge(MergeState mergeState) throws IOException {
|
|
||||||
// Used for bulk-reading raw bytes for term vectors
|
|
||||||
int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
|
|
||||||
int rawDocLengths2[] = new int[MAX_RAW_MERGE_DOCS];
|
|
||||||
|
|
||||||
int idx = 0;
|
|
||||||
int numDocs = 0;
|
|
||||||
for (int i = 0; i < mergeState.readers.size(); i++) {
|
|
||||||
final AtomicReader reader = mergeState.readers.get(i);
|
|
||||||
|
|
||||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
|
||||||
Lucene40TermVectorsReader matchingVectorsReader = null;
|
|
||||||
if (matchingSegmentReader != null) {
|
|
||||||
TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
|
||||||
|
|
||||||
if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
|
|
||||||
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (reader.getLiveDocs() != null) {
|
|
||||||
numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
|
|
||||||
} else {
|
|
||||||
numDocs += copyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finish(mergeState.fieldInfos, numDocs);
|
|
||||||
return numDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Maximum number of contiguous documents to bulk-copy
|
|
||||||
when merging term vectors */
|
|
||||||
private final static int MAX_RAW_MERGE_DOCS = 4192;
|
|
||||||
|
|
||||||
private int copyVectorsWithDeletions(MergeState mergeState,
|
|
||||||
final Lucene40TermVectorsReader matchingVectorsReader,
|
|
||||||
final AtomicReader reader,
|
|
||||||
int rawDocLengths[],
|
|
||||||
int rawDocLengths2[])
|
|
||||||
throws IOException {
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
final Bits liveDocs = reader.getLiveDocs();
|
|
||||||
int totalNumDocs = 0;
|
|
||||||
if (matchingVectorsReader != null) {
|
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
|
||||||
for (int docNum = 0; docNum < maxDoc;) {
|
|
||||||
if (!liveDocs.get(docNum)) {
|
|
||||||
// skip deleted docs
|
|
||||||
++docNum;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// We can optimize this case (doing a bulk byte copy) since the field
|
|
||||||
// numbers are identical
|
|
||||||
int start = docNum, numDocs = 0;
|
|
||||||
do {
|
|
||||||
docNum++;
|
|
||||||
numDocs++;
|
|
||||||
if (docNum >= maxDoc) break;
|
|
||||||
if (!liveDocs.get(docNum)) {
|
|
||||||
docNum++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while(numDocs < MAX_RAW_MERGE_DOCS);
|
|
||||||
|
|
||||||
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
|
|
||||||
addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
|
|
||||||
totalNumDocs += numDocs;
|
|
||||||
mergeState.checkAbort.work(300 * numDocs);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int docNum = 0; docNum < maxDoc; docNum++) {
|
|
||||||
if (!liveDocs.get(docNum)) {
|
|
||||||
// skip deleted docs
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: it's very important to first assign to vectors then pass it to
|
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
|
||||||
Fields vectors = reader.getTermVectors(docNum);
|
|
||||||
addAllDocVectors(vectors, mergeState);
|
|
||||||
totalNumDocs++;
|
|
||||||
mergeState.checkAbort.work(300);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return totalNumDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int copyVectorsNoDeletions(MergeState mergeState,
|
|
||||||
final Lucene40TermVectorsReader matchingVectorsReader,
|
|
||||||
final AtomicReader reader,
|
|
||||||
int rawDocLengths[],
|
|
||||||
int rawDocLengths2[])
|
|
||||||
throws IOException {
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
if (matchingVectorsReader != null) {
|
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
|
||||||
int docCount = 0;
|
|
||||||
while (docCount < maxDoc) {
|
|
||||||
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
|
|
||||||
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, docCount, len);
|
|
||||||
addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
|
|
||||||
docCount += len;
|
|
||||||
mergeState.checkAbort.work(300 * len);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int docNum = 0; docNum < maxDoc; docNum++) {
|
|
||||||
// NOTE: it's very important to first assign to vectors then pass it to
|
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
|
||||||
Fields vectors = reader.getTermVectors(docNum);
|
|
||||||
addAllDocVectors(vectors, mergeState);
|
|
||||||
mergeState.checkAbort.work(300);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(FieldInfos fis, int numDocs) {
|
public void finish(FieldInfos fis, int numDocs) {
|
|
@ -7,10 +7,12 @@ import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
|
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -49,6 +51,7 @@ public class Lucene41RWCodec extends Lucene41Codec {
|
||||||
|
|
||||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||||
|
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosFormat fieldInfosFormat() {
|
public FieldInfosFormat fieldInfosFormat() {
|
||||||
|
@ -70,4 +73,8 @@ public class Lucene41RWCodec extends Lucene41Codec {
|
||||||
return norms;
|
return norms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermVectorsFormat termVectorsFormat() {
|
||||||
|
return vectors;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,12 +22,11 @@ import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.MathUtil;
|
import org.apache.lucene.util.MathUtil;
|
||||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
|
@ -39,7 +38,7 @@ import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSIO
|
||||||
/**
|
/**
|
||||||
* Writer for {@link Lucene42NormsFormat}
|
* Writer for {@link Lucene42NormsFormat}
|
||||||
*/
|
*/
|
||||||
class Lucene42NormsConsumer extends DocValuesConsumer {
|
class Lucene42NormsConsumer extends NormsConsumer {
|
||||||
static final byte NUMBER = 0;
|
static final byte NUMBER = 0;
|
||||||
|
|
||||||
static final int BLOCK_SIZE = 4096;
|
static final int BLOCK_SIZE = 4096;
|
||||||
|
@ -73,7 +72,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
meta.writeVInt(field.number);
|
meta.writeVInt(field.number);
|
||||||
meta.writeByte(NUMBER);
|
meta.writeByte(NUMBER);
|
||||||
meta.writeLong(data.getFilePointer());
|
meta.writeLong(data.getFilePointer());
|
||||||
|
@ -194,24 +193,4 @@ class Lucene42NormsConsumer extends DocValuesConsumer {
|
||||||
meta = data = null;
|
meta = data = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
public class Lucene42RWNormsFormat extends Lucene42NormsFormat {
|
public class Lucene42RWNormsFormat extends Lucene42NormsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
if (LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
if (LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
|
||||||
return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
|
return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue