mirror of https://github.com/apache/lucene.git
Remove dead codec code.
This commit is contained in:
parent
4bb705bad8
commit
ca22f17662
|
@ -26,7 +26,6 @@ import java.util.function.IntFunction;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointValues;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -44,7 +43,6 @@ import org.apache.lucene.util.LongBitSet;
|
|||
import org.apache.lucene.util.MSBRadixSorter;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
import org.apache.lucene.util.bkd.HeapPointWriter;
|
||||
|
@ -309,124 +307,6 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
return pointCount;
|
||||
}
|
||||
|
||||
private static class MergeReader {
|
||||
final SimpleTextBKDReader bkd;
|
||||
final SimpleTextBKDReader.IntersectState state;
|
||||
final MergeState.DocMap docMap;
|
||||
|
||||
/** Current doc ID */
|
||||
public int docID;
|
||||
|
||||
/** Which doc in this block we are up to */
|
||||
private int docBlockUpto;
|
||||
|
||||
/** How many docs in the current block */
|
||||
private int docsInBlock;
|
||||
|
||||
/** Which leaf block we are up to */
|
||||
private int blockID;
|
||||
|
||||
private final byte[] packedValues;
|
||||
|
||||
public MergeReader(SimpleTextBKDReader bkd, MergeState.DocMap docMap) throws IOException {
|
||||
this.bkd = bkd;
|
||||
state = new SimpleTextBKDReader.IntersectState(bkd.in.clone(),
|
||||
bkd.numDims,
|
||||
bkd.packedBytesLength,
|
||||
bkd.maxPointsInLeafNode,
|
||||
null);
|
||||
this.docMap = docMap;
|
||||
long minFP = Long.MAX_VALUE;
|
||||
//System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
|
||||
for(long fp : bkd.leafBlockFPs) {
|
||||
minFP = Math.min(minFP, fp);
|
||||
//System.out.println(" leaf fp=" + fp);
|
||||
}
|
||||
state.in.seek(minFP);
|
||||
this.packedValues = new byte[bkd.maxPointsInLeafNode * bkd.packedBytesLength];
|
||||
}
|
||||
|
||||
public boolean next() throws IOException {
|
||||
//System.out.println("MR.next this=" + this);
|
||||
while (true) {
|
||||
if (docBlockUpto == docsInBlock) {
|
||||
if (blockID == bkd.leafBlockFPs.length) {
|
||||
//System.out.println(" done!");
|
||||
return false;
|
||||
}
|
||||
//System.out.println(" new block @ fp=" + state.in.getFilePointer());
|
||||
docsInBlock = bkd.readDocIDs(state.in, state.in.getFilePointer(), state.scratchDocIDs);
|
||||
assert docsInBlock > 0;
|
||||
docBlockUpto = 0;
|
||||
bkd.visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, docsInBlock, new IntersectVisitor() {
|
||||
int i = 0;
|
||||
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||
assert docID == state.scratchDocIDs[i];
|
||||
System.arraycopy(packedValue, 0, packedValues, i * bkd.packedBytesLength, bkd.packedBytesLength);
|
||||
i++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
blockID++;
|
||||
}
|
||||
|
||||
final int index = docBlockUpto++;
|
||||
int oldDocID = state.scratchDocIDs[index];
|
||||
|
||||
int mappedDocID;
|
||||
if (docMap == null) {
|
||||
mappedDocID = oldDocID;
|
||||
} else {
|
||||
mappedDocID = docMap.get(oldDocID);
|
||||
}
|
||||
|
||||
if (mappedDocID != -1) {
|
||||
// Not deleted!
|
||||
docID = mappedDocID;
|
||||
System.arraycopy(packedValues, index * bkd.packedBytesLength, state.scratchPackedValue, 0, bkd.packedBytesLength);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class BKDMergeQueue extends PriorityQueue<MergeReader> {
|
||||
private final int bytesPerDim;
|
||||
|
||||
public BKDMergeQueue(int bytesPerDim, int maxSize) {
|
||||
super(maxSize);
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean lessThan(MergeReader a, MergeReader b) {
|
||||
assert a != b;
|
||||
|
||||
int cmp = StringHelper.compare(bytesPerDim, a.state.scratchPackedValue, 0, b.state.scratchPackedValue, 0);
|
||||
if (cmp < 0) {
|
||||
return true;
|
||||
} else if (cmp > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Tie break by sorting smaller docIDs earlier:
|
||||
return a.docID < b.docID;
|
||||
}
|
||||
}
|
||||
|
||||
/** Write a field from a {@link MutablePointValues}. This way of writing
|
||||
* points is faster than regular writes with {@link BKDWriter#add} since
|
||||
* there is opportunity for reordering points before writing them to
|
||||
|
@ -527,50 +407,6 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
// TODO: remove this opto: SimpleText is supposed to be simple!
|
||||
|
||||
/** More efficient bulk-add for incoming {@link SimpleTextBKDReader}s. This does a merge sort of the already
|
||||
* sorted values and currently only works when numDims==1. This returns -1 if all documents containing
|
||||
* dimensional values were deleted. */
|
||||
public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<SimpleTextBKDReader> readers) throws IOException {
|
||||
assert docMaps == null || readers.size() == docMaps.size();
|
||||
|
||||
BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
|
||||
|
||||
for(int i=0;i<readers.size();i++) {
|
||||
SimpleTextBKDReader bkd = readers.get(i);
|
||||
MergeState.DocMap docMap;
|
||||
if (docMaps == null) {
|
||||
docMap = null;
|
||||
} else {
|
||||
docMap = docMaps.get(i);
|
||||
}
|
||||
MergeReader reader = new MergeReader(bkd, docMap);
|
||||
if (reader.next()) {
|
||||
queue.add(reader);
|
||||
}
|
||||
}
|
||||
|
||||
OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
|
||||
|
||||
while (queue.size() != 0) {
|
||||
MergeReader reader = queue.top();
|
||||
// System.out.println("iter reader=" + reader);
|
||||
|
||||
// NOTE: doesn't work with subclasses (e.g. SimpleText!)
|
||||
oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
|
||||
|
||||
if (reader.next()) {
|
||||
queue.updateTop();
|
||||
} else {
|
||||
// This segment was exhausted
|
||||
queue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
return oneDimWriter.finish();
|
||||
}
|
||||
|
||||
private class OneDimensionBKDWriter {
|
||||
|
||||
final IndexOutput out;
|
||||
|
|
|
@ -514,16 +514,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
static class TermData {
|
||||
public long docsStart;
|
||||
public int docFreq;
|
||||
|
||||
public TermData(long docsStart, int docFreq) {
|
||||
this.docsStart = docsStart;
|
||||
this.docFreq = docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
private static final long TERMS_BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
|
||||
|
|
|
@ -1,95 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet; // javadocs
|
||||
|
||||
/** Takes a {@link FixedBitSet} and creates a DOCS {@link PostingsEnum} from it. */
|
||||
|
||||
class BitSetPostingsEnum extends PostingsEnum {
|
||||
private final BitSet bits;
|
||||
private DocIdSetIterator in;
|
||||
|
||||
BitSetPostingsEnum(BitSet bits) {
|
||||
this.bits = bits;
|
||||
reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
if (in == null) {
|
||||
return -1;
|
||||
} else {
|
||||
return in.docID();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (in == null) {
|
||||
in = new BitSetIterator(bits, 0);
|
||||
}
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
in = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Silly stub class, used only when writing an auto-prefix
|
||||
* term in order to expose DocsEnum over a FixedBitSet. We
|
||||
* pass this to {@link PostingsWriterBase#writeTerm} so
|
||||
* that it can pull .docs() multiple times for the
|
||||
* current term. */
|
||||
|
||||
class BitSetTermsEnum extends TermsEnum {
|
||||
private final BitSetPostingsEnum postingsEnum;
|
||||
|
||||
public BitSetTermsEnum(BitSet docs) {
|
||||
postingsEnum = new BitSetPostingsEnum(docs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsEnum postings(PostingsEnum reuse, int flags) {
|
||||
if (flags != PostingsEnum.NONE) {
|
||||
// We only work with DOCS_ONLY fields
|
||||
return null;
|
||||
}
|
||||
postingsEnum.reset();
|
||||
return postingsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -1,268 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene62;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||
import org.apache.lucene.index.SegmentInfo; // javadocs
|
||||
import org.apache.lucene.index.SegmentInfos; // javadocs
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSelector;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput; // javadocs
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Lucene 6.2 Segment info format.
|
||||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, IndexSort, Footer
|
||||
* </ul>
|
||||
* Data types:
|
||||
* <ul>
|
||||
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
||||
* <li>SegSize --> {@link DataOutput#writeInt Int32}</li>
|
||||
* <li>SegVersion --> {@link DataOutput#writeString String}</li>
|
||||
* <li>Files --> {@link DataOutput#writeSetOfStrings Set<String>}</li>
|
||||
* <li>Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}</li>
|
||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||
* <li>IndexSort --> {@link DataOutput#writeVInt Int32} count, followed by {@code count} SortField</li>
|
||||
* <li>SortField --> {@link DataOutput#writeString String} field name, followed by {@link DataOutput#writeVInt Int32} sort type ID,
|
||||
* followed by {@link DataOutput#writeByte Int8} indicatating reversed sort, followed by a type-specific encoding of the optional missing value
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* Field Descriptions:
|
||||
* <ul>
|
||||
* <li>SegVersion is the code version that created the segment.</li>
|
||||
* <li>SegSize is the number of documents contained in the segment index.</li>
|
||||
* <li>IsCompoundFile records whether the segment is written as a compound file or
|
||||
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
|
||||
* is a compound file.</li>
|
||||
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
|
||||
* for each segment it creates. It includes metadata like the current Lucene
|
||||
* version, OS, Java version, why the segment was created (merge, flush,
|
||||
* addIndexes), etc.</li>
|
||||
* <li>Files is a list of files referred to by this segment.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @see SegmentInfos
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene62SegmentInfoFormat() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
|
||||
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
|
||||
Throwable priorE = null;
|
||||
SegmentInfo si = null;
|
||||
try {
|
||||
int format = CodecUtil.checkIndexHeader(input, Lucene62SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene62SegmentInfoFormat.VERSION_START,
|
||||
Lucene62SegmentInfoFormat.VERSION_CURRENT,
|
||||
segmentID, "");
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
int numSortFields = input.readVInt();
|
||||
Sort indexSort;
|
||||
if (numSortFields > 0) {
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for(int i=0;i<numSortFields;i++) {
|
||||
String fieldName = input.readString();
|
||||
int sortTypeID = input.readVInt();
|
||||
SortField.Type sortType;
|
||||
SortedSetSelector.Type sortedSetSelector = null;
|
||||
SortedNumericSelector.Type sortedNumericSelector = null;
|
||||
switch(sortTypeID) {
|
||||
case 0:
|
||||
sortType = SortField.Type.STRING;
|
||||
break;
|
||||
case 1:
|
||||
sortType = SortField.Type.LONG;
|
||||
break;
|
||||
case 2:
|
||||
sortType = SortField.Type.INT;
|
||||
break;
|
||||
case 3:
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
break;
|
||||
case 4:
|
||||
sortType = SortField.Type.FLOAT;
|
||||
break;
|
||||
case 5:
|
||||
sortType = SortField.Type.STRING;
|
||||
byte selector = input.readByte();
|
||||
if (selector == 0) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIN;
|
||||
} else if (selector == 1) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MAX;
|
||||
} else if (selector == 2) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
|
||||
} else if (selector == 3) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
byte type = input.readByte();
|
||||
if (type == 0) {
|
||||
sortType = SortField.Type.LONG;
|
||||
} else if (type == 1) {
|
||||
sortType = SortField.Type.INT;
|
||||
} else if (type == 2) {
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
} else if (type == 3) {
|
||||
sortType = SortField.Type.FLOAT;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
|
||||
}
|
||||
byte numericSelector = input.readByte();
|
||||
if (numericSelector == 0) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MIN;
|
||||
} else if (numericSelector == 1) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
|
||||
}
|
||||
byte b = input.readByte();
|
||||
boolean reverse;
|
||||
if (b == 0) {
|
||||
reverse = true;
|
||||
} else if (b == 1) {
|
||||
reverse = false;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
|
||||
}
|
||||
|
||||
if (sortedSetSelector != null) {
|
||||
sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
|
||||
} else if (sortedNumericSelector != null) {
|
||||
sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
|
||||
} else {
|
||||
sortFields[i] = new SortField(fieldName, sortType, reverse);
|
||||
}
|
||||
|
||||
Object missingValue;
|
||||
b = input.readByte();
|
||||
if (b == 0) {
|
||||
missingValue = null;
|
||||
} else {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (b == 1) {
|
||||
missingValue = SortField.STRING_LAST;
|
||||
} else if (b == 2) {
|
||||
missingValue = SortField.STRING_FIRST;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readLong();
|
||||
break;
|
||||
case INT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readInt();
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Double.longBitsToDouble(input.readLong());
|
||||
break;
|
||||
case FLOAT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Float.intBitsToFloat(input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("unhandled sortType=" + sortType);
|
||||
}
|
||||
}
|
||||
if (missingValue != null) {
|
||||
sortFields[i].setMissingValue(missingValue);
|
||||
}
|
||||
}
|
||||
indexSort = new Sort(sortFields);
|
||||
} else if (numSortFields < 0) {
|
||||
throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
|
||||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
CodecUtil.checkFooter(input, priorE);
|
||||
}
|
||||
return si;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
|
||||
throw new UnsupportedOperationException("This format can only be used for reading");
|
||||
}
|
||||
|
||||
/** File extension used to store {@link SegmentInfo}. */
|
||||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene62SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_MULTI_VALUED_SORT = 1;
|
||||
static final int VERSION_CURRENT = VERSION_MULTI_VALUED_SORT;
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Components from the Lucene 6.2 index format
|
||||
* See {@link org.apache.lucene.codecs.lucene70} for an overview
|
||||
* of the current index format.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.codecs.lucene62;
|
|
@ -128,7 +128,7 @@
|
|||
* <p>Each segment index maintains the following:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* {@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment info}.
|
||||
* {@link org.apache.lucene.codecs.lucene70.Lucene70SegmentInfoFormat Segment info}.
|
||||
* This contains metadata about a segment, such as the number of documents,
|
||||
* what files it uses,
|
||||
* </li>
|
||||
|
@ -233,7 +233,7 @@
|
|||
* file.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat Segment Info}</td>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene70.Lucene70SegmentInfoFormat Segment Info}</td>
|
||||
* <td>.si</td>
|
||||
* <td>Stores metadata about a segment</td>
|
||||
* </tr>
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.MathUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
|
||||
|
@ -48,21 +47,14 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
final int version;
|
||||
protected final int packedBytesLength;
|
||||
|
||||
// Used for 6.4.0+ index format:
|
||||
final byte[] packedIndex;
|
||||
|
||||
// Used for Legacy (pre-6.4.0) index format, to hold a compact form of the index:
|
||||
final private byte[] splitPackedValues;
|
||||
final int bytesPerIndexEntry;
|
||||
final long[] leafBlockFPs;
|
||||
|
||||
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
|
||||
public BKDReader(IndexInput in) throws IOException {
|
||||
version = CodecUtil.checkHeader(in, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
|
||||
numDims = in.readVInt();
|
||||
maxPointsInLeafNode = in.readVInt();
|
||||
bytesPerDim = in.readVInt();
|
||||
bytesPerIndexEntry = numDims == 1 && version >= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1;
|
||||
packedBytesLength = numDims * bytesPerDim;
|
||||
|
||||
// Read index:
|
||||
|
@ -85,112 +77,114 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
pointCount = in.readVLong();
|
||||
docCount = in.readVInt();
|
||||
|
||||
if (version >= BKDWriter.VERSION_PACKED_INDEX) {
|
||||
int numBytes = in.readVInt();
|
||||
packedIndex = new byte[numBytes];
|
||||
in.readBytes(packedIndex, 0, numBytes);
|
||||
leafBlockFPs = null;
|
||||
splitPackedValues = null;
|
||||
} else {
|
||||
// legacy un-packed index
|
||||
|
||||
splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
|
||||
|
||||
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
|
||||
|
||||
// Read the file pointers to the start of each leaf block:
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
long lastFP = 0;
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
long delta = in.readVLong();
|
||||
leafBlockFPs[i] = lastFP + delta;
|
||||
lastFP += delta;
|
||||
}
|
||||
|
||||
// Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
|
||||
// if it was created by BKDWriter.merge or OneDimWriter). In this case the leaf nodes may straddle the two bottom
|
||||
// levels of the binary tree:
|
||||
if (numDims == 1 && numLeaves > 1) {
|
||||
int levelCount = 2;
|
||||
while (true) {
|
||||
if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
|
||||
int lastLevel = 2*(numLeaves - levelCount);
|
||||
assert lastLevel >= 0;
|
||||
if (lastLevel != 0) {
|
||||
// Last level is partially filled, so we must rotate the leaf FPs to match. We do this here, after loading
|
||||
// at read-time, so that we can still delta code them on disk at write:
|
||||
long[] newLeafBlockFPs = new long[numLeaves];
|
||||
System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
|
||||
System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
|
||||
leafBlockFPs = newLeafBlockFPs;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
levelCount *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
this.leafBlockFPs = leafBlockFPs;
|
||||
packedIndex = null;
|
||||
}
|
||||
int numBytes = in.readVInt();
|
||||
packedIndex = new byte[numBytes];
|
||||
in.readBytes(packedIndex, 0, numBytes);
|
||||
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
long getMinLeafBlockFP() {
|
||||
if (packedIndex != null) {
|
||||
return new ByteArrayDataInput(packedIndex).readVLong();
|
||||
} else {
|
||||
long minFP = Long.MAX_VALUE;
|
||||
for(long fp : leafBlockFPs) {
|
||||
minFP = Math.min(minFP, fp);
|
||||
}
|
||||
return minFP;
|
||||
}
|
||||
return new ByteArrayDataInput(packedIndex).readVLong();
|
||||
}
|
||||
|
||||
/** Used to walk the in-heap index
|
||||
*
|
||||
* @lucene.internal */
|
||||
public abstract class IndexTree implements Cloneable {
|
||||
protected int nodeID;
|
||||
/** Used to walk the in-heap index. The format takes advantage of the limited
|
||||
* access pattern to the BKD tree at search time, i.e. starting at the root
|
||||
* node and recursing downwards one child at a time.
|
||||
* @lucene.internal */
|
||||
public class IndexTree implements Cloneable {
|
||||
private int nodeID;
|
||||
// level is 1-based so that we can do level-1 w/o checking each time:
|
||||
protected int level;
|
||||
protected int splitDim;
|
||||
protected final byte[][] splitPackedValueStack;
|
||||
private int level;
|
||||
private int splitDim;
|
||||
private final byte[][] splitPackedValueStack;
|
||||
// used to read the packed byte[]
|
||||
private final ByteArrayDataInput in;
|
||||
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
|
||||
private final long[] leafBlockFPStack;
|
||||
// holds the address, in the packed byte[] index, of the left-node of each level:
|
||||
private final int[] leftNodePositions;
|
||||
// holds the address, in the packed byte[] index, of the right-node of each level:
|
||||
private final int[] rightNodePositions;
|
||||
// holds the splitDim for each level:
|
||||
private final int[] splitDims;
|
||||
// true if the per-dim delta we read for the node at this level is a negative offset vs. the last split on this dim; this is a packed
|
||||
// 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim]. this will be true if the last time we
|
||||
// split on this dimension, we next pushed to the left sub-tree:
|
||||
private final boolean[] negativeDeltas;
|
||||
// holds the packed per-level split values; the intersect method uses this to save the cell min/max as it recurses:
|
||||
private final byte[][] splitValuesStack;
|
||||
// scratch value to return from getPackedValue:
|
||||
private final BytesRef scratch;
|
||||
|
||||
protected IndexTree() {
|
||||
IndexTree() {
|
||||
int treeDepth = getTreeDepth();
|
||||
splitPackedValueStack = new byte[treeDepth+1][];
|
||||
nodeID = 1;
|
||||
level = 1;
|
||||
splitPackedValueStack[level] = new byte[packedBytesLength];
|
||||
leafBlockFPStack = new long[treeDepth+1];
|
||||
leftNodePositions = new int[treeDepth+1];
|
||||
rightNodePositions = new int[treeDepth+1];
|
||||
splitValuesStack = new byte[treeDepth+1][];
|
||||
splitDims = new int[treeDepth+1];
|
||||
negativeDeltas = new boolean[numDims*(treeDepth+1)];
|
||||
|
||||
in = new ByteArrayDataInput(packedIndex);
|
||||
splitValuesStack[0] = new byte[packedBytesLength];
|
||||
readNodeData(false);
|
||||
scratch = new BytesRef();
|
||||
scratch.length = bytesPerDim;
|
||||
}
|
||||
|
||||
public void pushLeft() {
|
||||
int nodePosition = leftNodePositions[level];
|
||||
nodeID *= 2;
|
||||
level++;
|
||||
if (splitPackedValueStack[level] == null) {
|
||||
splitPackedValueStack[level] = new byte[packedBytesLength];
|
||||
}
|
||||
System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
|
||||
assert splitDim != -1;
|
||||
negativeDeltas[level*numDims+splitDim] = true;
|
||||
in.setPosition(nodePosition);
|
||||
readNodeData(true);
|
||||
}
|
||||
|
||||
|
||||
/** Clone, but you are not allowed to pop up past the point where the clone happened. */
|
||||
public abstract IndexTree clone();
|
||||
@Override
|
||||
public IndexTree clone() {
|
||||
IndexTree index = new IndexTree();
|
||||
index.nodeID = nodeID;
|
||||
index.level = level;
|
||||
index.splitDim = splitDim;
|
||||
index.leafBlockFPStack[level] = leafBlockFPStack[level];
|
||||
index.leftNodePositions[level] = leftNodePositions[level];
|
||||
index.rightNodePositions[level] = rightNodePositions[level];
|
||||
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
|
||||
System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
|
||||
index.splitDims[level] = splitDims[level];
|
||||
return index;
|
||||
}
|
||||
|
||||
public void pushRight() {
|
||||
int nodePosition = rightNodePositions[level];
|
||||
nodeID = nodeID * 2 + 1;
|
||||
level++;
|
||||
if (splitPackedValueStack[level] == null) {
|
||||
splitPackedValueStack[level] = new byte[packedBytesLength];
|
||||
}
|
||||
System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
|
||||
assert splitDim != -1;
|
||||
negativeDeltas[level*numDims+splitDim] = false;
|
||||
in.setPosition(nodePosition);
|
||||
readNodeData(false);
|
||||
}
|
||||
|
||||
public void pop() {
|
||||
nodeID /= 2;
|
||||
level--;
|
||||
splitDim = -1;
|
||||
splitDim = splitDims[level];
|
||||
//System.out.println(" pop nodeID=" + nodeID);
|
||||
}
|
||||
|
||||
|
@ -219,10 +213,18 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
}
|
||||
|
||||
/** Only valid after pushLeft or pushRight, not pop! */
|
||||
public abstract BytesRef getSplitDimValue();
|
||||
public BytesRef getSplitDimValue() {
|
||||
assert isLeafNode() == false;
|
||||
scratch.bytes = splitValuesStack[level];
|
||||
scratch.offset = splitDim * bytesPerDim;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
/** Only valid after pushLeft or pushRight, not pop! */
|
||||
public abstract long getLeafBlockFP();
|
||||
public long getLeafBlockFP() {
|
||||
assert isLeafNode(): "nodeID=" + nodeID + " is not a leaf";
|
||||
return leafBlockFPStack[level];
|
||||
}
|
||||
|
||||
/** Return the number of leaves below the current node. */
|
||||
public int getNumLeaves() {
|
||||
|
@ -258,180 +260,6 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
return leftCount + rightCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Reads the original simple yet heap-heavy index format */
|
||||
private final class LegacyIndexTree extends IndexTree {
|
||||
|
||||
private long leafBlockFP;
|
||||
private final byte[] splitDimValue = new byte[bytesPerDim];
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
public LegacyIndexTree() {
|
||||
setNodeData();
|
||||
scratch.bytes = splitDimValue;
|
||||
scratch.length = bytesPerDim;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LegacyIndexTree clone() {
|
||||
LegacyIndexTree index = new LegacyIndexTree();
|
||||
index.nodeID = nodeID;
|
||||
index.level = level;
|
||||
index.splitDim = splitDim;
|
||||
index.leafBlockFP = leafBlockFP;
|
||||
index.splitPackedValueStack[index.level] = splitPackedValueStack[index.level].clone();
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pushLeft() {
|
||||
super.pushLeft();
|
||||
setNodeData();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pushRight() {
|
||||
super.pushRight();
|
||||
setNodeData();
|
||||
}
|
||||
|
||||
private void setNodeData() {
|
||||
if (isLeafNode()) {
|
||||
leafBlockFP = leafBlockFPs[nodeID - leafNodeOffset];
|
||||
splitDim = -1;
|
||||
} else {
|
||||
leafBlockFP = -1;
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
if (numDims == 1) {
|
||||
splitDim = 0;
|
||||
if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
|
||||
// skip over wastefully encoded 0 splitDim:
|
||||
assert splitPackedValues[address] == 0;
|
||||
address++;
|
||||
}
|
||||
} else {
|
||||
splitDim = splitPackedValues[address++] & 0xff;
|
||||
}
|
||||
System.arraycopy(splitPackedValues, address, splitDimValue, 0, bytesPerDim);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLeafBlockFP() {
|
||||
assert isLeafNode();
|
||||
return leafBlockFP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getSplitDimValue() {
|
||||
assert isLeafNode() == false;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pop() {
|
||||
super.pop();
|
||||
leafBlockFP = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/** Reads the new packed byte[] index format which can be up to ~63% smaller than the legacy index format on 20M NYC taxis tests. This
|
||||
* format takes advantage of the limited access pattern to the BKD tree at search time, i.e. starting at the root node and recursing
|
||||
* downwards one child at a time. */
|
||||
private final class PackedIndexTree extends IndexTree {
|
||||
// used to read the packed byte[]
|
||||
private final ByteArrayDataInput in;
|
||||
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
|
||||
private final long[] leafBlockFPStack;
|
||||
// holds the address, in the packed byte[] index, of the left-node of each level:
|
||||
private final int[] leftNodePositions;
|
||||
// holds the address, in the packed byte[] index, of the right-node of each level:
|
||||
private final int[] rightNodePositions;
|
||||
// holds the splitDim for each level:
|
||||
private final int[] splitDims;
|
||||
// true if the per-dim delta we read for the node at this level is a negative offset vs. the last split on this dim; this is a packed
|
||||
// 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim]. this will be true if the last time we
|
||||
// split on this dimension, we next pushed to the left sub-tree:
|
||||
private final boolean[] negativeDeltas;
|
||||
// holds the packed per-level split values; the intersect method uses this to save the cell min/max as it recurses:
|
||||
private final byte[][] splitValuesStack;
|
||||
// scratch value to return from getPackedValue:
|
||||
private final BytesRef scratch;
|
||||
|
||||
public PackedIndexTree() {
|
||||
int treeDepth = getTreeDepth();
|
||||
leafBlockFPStack = new long[treeDepth+1];
|
||||
leftNodePositions = new int[treeDepth+1];
|
||||
rightNodePositions = new int[treeDepth+1];
|
||||
splitValuesStack = new byte[treeDepth+1][];
|
||||
splitDims = new int[treeDepth+1];
|
||||
negativeDeltas = new boolean[numDims*(treeDepth+1)];
|
||||
|
||||
in = new ByteArrayDataInput(packedIndex);
|
||||
splitValuesStack[0] = new byte[packedBytesLength];
|
||||
readNodeData(false);
|
||||
scratch = new BytesRef();
|
||||
scratch.length = bytesPerDim;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PackedIndexTree clone() {
|
||||
PackedIndexTree index = new PackedIndexTree();
|
||||
index.nodeID = nodeID;
|
||||
index.level = level;
|
||||
index.splitDim = splitDim;
|
||||
index.leafBlockFPStack[level] = leafBlockFPStack[level];
|
||||
index.leftNodePositions[level] = leftNodePositions[level];
|
||||
index.rightNodePositions[level] = rightNodePositions[level];
|
||||
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
|
||||
System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
|
||||
index.splitDims[level] = splitDims[level];
|
||||
return index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pushLeft() {
|
||||
int nodePosition = leftNodePositions[level];
|
||||
super.pushLeft();
|
||||
System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
|
||||
assert splitDim != -1;
|
||||
negativeDeltas[level*numDims+splitDim] = true;
|
||||
in.setPosition(nodePosition);
|
||||
readNodeData(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pushRight() {
|
||||
int nodePosition = rightNodePositions[level];
|
||||
super.pushRight();
|
||||
System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
|
||||
assert splitDim != -1;
|
||||
negativeDeltas[level*numDims+splitDim] = false;
|
||||
in.setPosition(nodePosition);
|
||||
readNodeData(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pop() {
|
||||
super.pop();
|
||||
splitDim = splitDims[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLeafBlockFP() {
|
||||
assert isLeafNode(): "nodeID=" + nodeID + " is not a leaf";
|
||||
return leafBlockFPStack[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getSplitDimValue() {
|
||||
assert isLeafNode() == false;
|
||||
scratch.bytes = splitValuesStack[level];
|
||||
scratch.offset = splitDim * bytesPerDim;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
private void readNodeData(boolean isLeft) {
|
||||
|
||||
|
@ -559,12 +387,7 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
|
||||
/** Create a new {@link IntersectState} */
|
||||
public IntersectState getIntersectState(IntersectVisitor visitor) {
|
||||
IndexTree index;
|
||||
if (packedIndex != null) {
|
||||
index = new PackedIndexTree();
|
||||
} else {
|
||||
index = new LegacyIndexTree();
|
||||
}
|
||||
IndexTree index = new IndexTree();
|
||||
return new IntersectState(in.clone(), numDims,
|
||||
packedBytesLength,
|
||||
maxPointsInLeafNode,
|
||||
|
@ -590,11 +413,7 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
int count = in.readVInt();
|
||||
// No need to call grow(), it has been called up-front
|
||||
|
||||
if (version < BKDWriter.VERSION_COMPRESSED_DOC_IDS) {
|
||||
DocIdsWriter.readInts32(in, count, visitor);
|
||||
} else {
|
||||
DocIdsWriter.readInts(in, count, visitor);
|
||||
}
|
||||
DocIdsWriter.readInts(in, count, visitor);
|
||||
}
|
||||
|
||||
int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
|
||||
|
@ -603,11 +422,7 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
// How many points are stored in this leaf cell:
|
||||
int count = in.readVInt();
|
||||
|
||||
if (version < BKDWriter.VERSION_COMPRESSED_DOC_IDS) {
|
||||
DocIdsWriter.readInts32(in, count, docIDs);
|
||||
} else {
|
||||
DocIdsWriter.readInts(in, count, docIDs);
|
||||
}
|
||||
DocIdsWriter.readInts(in, count, docIDs);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -617,9 +432,7 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
|
||||
readCommonPrefixes(commonPrefixLengths, scratchPackedValue, in);
|
||||
|
||||
int compressedDim = version < BKDWriter.VERSION_COMPRESSED_VALUES
|
||||
? -1
|
||||
: readCompressedDim(in);
|
||||
int compressedDim = readCompressedDim(in);
|
||||
|
||||
if (compressedDim == -1) {
|
||||
visitRawDocValues(commonPrefixLengths, scratchPackedValue, in, docIDs, count, visitor);
|
||||
|
@ -803,11 +616,7 @@ public final class BKDReader extends PointValues implements Accountable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
if (packedIndex != null) {
|
||||
return packedIndex.length;
|
||||
} else {
|
||||
return RamUsageEstimator.sizeOf(splitPackedValues) + RamUsageEstimator.sizeOf(leafBlockFPs);
|
||||
}
|
||||
return packedIndex.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -82,12 +82,8 @@ import org.apache.lucene.util.StringHelper;
|
|||
public class BKDWriter implements Closeable {
|
||||
|
||||
public static final String CODEC_NAME = "BKD";
|
||||
public static final int VERSION_START = 0;
|
||||
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
|
||||
public static final int VERSION_COMPRESSED_VALUES = 2;
|
||||
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
|
||||
public static final int VERSION_PACKED_INDEX = 4;
|
||||
public static final int VERSION_CURRENT = VERSION_PACKED_INDEX;
|
||||
public static final int VERSION_START = 4; // version used by Lucene 7.0
|
||||
public static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** How many bytes each docs takes in the fixed-width offline format */
|
||||
private final int bytesPerDoc;
|
||||
|
|
|
@ -142,7 +142,7 @@ class DocIdsWriter {
|
|||
}
|
||||
}
|
||||
|
||||
static void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
|
||||
private static void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
|
||||
for (int i = 0; i < count; i++) {
|
||||
visitor.visit(in.readInt());
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
@ -103,27 +102,8 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
// Increment version to change it
|
||||
private static final String FILE_FORMAT_NAME = "FST";
|
||||
private static final int VERSION_START = 0;
|
||||
|
||||
/** Changed numBytesPerArc for array'd case from byte to int. */
|
||||
private static final int VERSION_INT_NUM_BYTES_PER_ARC = 1;
|
||||
|
||||
/** Write BYTE2 labels as 2-byte short, not vInt. */
|
||||
private static final int VERSION_SHORT_BYTE2_LABELS = 2;
|
||||
|
||||
/** Added optional packed format. */
|
||||
private static final int VERSION_PACKED = 3;
|
||||
|
||||
/** Changed from int to vInt for encoding arc targets.
|
||||
* Also changed maxBytesPerArc from int to vInt in the array case. */
|
||||
private static final int VERSION_VINT_TARGET = 4;
|
||||
|
||||
/** Don't store arcWithOutputCount anymore */
|
||||
private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
|
||||
|
||||
private static final int VERSION_PACKED_REMOVED = 6;
|
||||
|
||||
private static final int VERSION_CURRENT = VERSION_PACKED_REMOVED;
|
||||
private static final int VERSION_START = 6;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
// Never serialized; just used to represent the virtual
|
||||
// final node w/ no arcs:
|
||||
|
@ -285,12 +265,7 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
// NOTE: only reads most recent format; we don't have
|
||||
// back-compat promise for FSTs (they are experimental):
|
||||
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_CURRENT);
|
||||
if (version < VERSION_PACKED_REMOVED) {
|
||||
if (in.readByte() == 1) {
|
||||
throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
|
||||
}
|
||||
}
|
||||
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
|
||||
if (in.readByte() == 1) {
|
||||
// accepts empty string
|
||||
// 1 KB blocks:
|
||||
|
@ -325,11 +300,6 @@ public final class FST<T> implements Accountable {
|
|||
throw new IllegalStateException("invalid input type " + t);
|
||||
}
|
||||
startNode = in.readVLong();
|
||||
if (version < VERSION_NO_NODE_ARC_COUNTS) {
|
||||
in.readVLong();
|
||||
in.readVLong();
|
||||
in.readVLong();
|
||||
}
|
||||
|
||||
long numBytes = in.readVLong();
|
||||
if (numBytes > 1 << maxBlockBits) {
|
||||
|
@ -768,11 +738,7 @@ public final class FST<T> implements Accountable {
|
|||
if (b == ARCS_AS_FIXED_ARRAY) {
|
||||
// array: jump straight to end
|
||||
arc.numArcs = in.readVInt();
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
}
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
//System.out.println(" array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
|
||||
arc.posArcsStart = in.getPosition();
|
||||
arc.arcIdx = arc.numArcs - 2;
|
||||
|
@ -808,13 +774,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
private long readUnpackedNodeTarget(BytesReader in) throws IOException {
|
||||
long target;
|
||||
if (version < VERSION_VINT_TARGET) {
|
||||
target = in.readInt();
|
||||
} else {
|
||||
target = in.readVLong();
|
||||
}
|
||||
return target;
|
||||
return in.readVLong();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -857,11 +817,7 @@ public final class FST<T> implements Accountable {
|
|||
//System.out.println(" fixedArray");
|
||||
// this is first arc in a fixed-array
|
||||
arc.numArcs = in.readVInt();
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
}
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
arc.arcIdx = -1;
|
||||
arc.nextArc = arc.posArcsStart = in.getPosition();
|
||||
//System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
|
||||
|
@ -920,11 +876,7 @@ public final class FST<T> implements Accountable {
|
|||
in.readVInt();
|
||||
|
||||
// Skip bytesPerArc:
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
in.readVInt();
|
||||
} else {
|
||||
in.readInt();
|
||||
}
|
||||
in.readVInt();
|
||||
} else {
|
||||
in.setPosition(pos);
|
||||
}
|
||||
|
@ -1092,11 +1044,7 @@ public final class FST<T> implements Accountable {
|
|||
if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
|
||||
// Arcs are full array; do binary search:
|
||||
arc.numArcs = in.readVInt();
|
||||
if (version >= VERSION_VINT_TARGET) {
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
} else {
|
||||
arc.bytesPerArc = in.readInt();
|
||||
}
|
||||
arc.bytesPerArc = in.readVInt();
|
||||
arc.posArcsStart = in.getPosition();
|
||||
int low = 0;
|
||||
int high = arc.numArcs-1;
|
||||
|
|
Loading…
Reference in New Issue