mirror of https://github.com/apache/lucene.git
4.0 sortedbytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438072 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e9ea070f5b
commit
7e97250ac8
|
@ -87,7 +87,7 @@ public class Lucene40Codec extends Codec {
|
|||
return infosFormat;
|
||||
}
|
||||
|
||||
private final DocValuesFormat defaultDVFormat = new Lucene40LyingDocValuesFormat();
|
||||
private final DocValuesFormat defaultDVFormat = new Lucene40DocValuesFormat();
|
||||
|
||||
@Override
|
||||
public DocValuesFormat docValuesFormat() {
|
||||
|
|
|
@ -51,7 +51,6 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
|
|||
static final String VAR_INTS_CODEC_NAME = "PackedInts";
|
||||
static final int VAR_INTS_VERSION_START = 0;
|
||||
static final int VAR_INTS_VERSION_CURRENT = VAR_INTS_VERSION_START;
|
||||
|
||||
static final byte VAR_INTS_PACKED = 0x00;
|
||||
static final byte VAR_INTS_FIXED_64 = 0x01;
|
||||
|
||||
|
@ -75,4 +74,17 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
|
|||
static final String BYTES_VAR_STRAIGHT_CODEC_NAME_DAT = "VarStraightBytesDat";
|
||||
static final int BYTES_VAR_STRAIGHT_VERSION_START = 0;
|
||||
static final int BYTES_VAR_STRAIGHT_VERSION_CURRENT = BYTES_VAR_STRAIGHT_VERSION_START;
|
||||
|
||||
// constants for BYTES_FIXED_SORTED
|
||||
static final String BYTES_FIXED_SORTED_CODEC_NAME_IDX = "FixedSortedBytesIdx";
|
||||
static final String BYTES_FIXED_SORTED_CODEC_NAME_DAT = "FixedSortedBytesDat";
|
||||
static final int BYTES_FIXED_SORTED_VERSION_START = 0;
|
||||
static final int BYTES_FIXED_SORTED_VERSION_CURRENT = BYTES_FIXED_SORTED_VERSION_START;
|
||||
|
||||
// constants for BYTES_VAR_SORTED
|
||||
// NOTE THIS IS NOT A BUG! 4.0 actually screwed this up (VAR_SORTED and VAR_DEREF have same codec header)
|
||||
static final String BYTES_VAR_SORTED_CODEC_NAME_IDX = "VarDerefBytesIdx";
|
||||
static final String BYTES_VAR_SORTED_CODEC_NAME_DAT = "VarDerefBytesDat";
|
||||
static final int BYTES_VAR_SORTED_VERSION_START = 0;
|
||||
static final int BYTES_VAR_SORTED_VERSION_CURRENT = BYTES_VAR_SORTED_VERSION_START;
|
||||
}
|
||||
|
|
|
@ -276,7 +276,7 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
instance = loadBytesVarStraight(field);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
throw new AssertionError(); // nocommit
|
||||
}
|
||||
binaryInstances.put(field.number, instance);
|
||||
}
|
||||
|
@ -355,7 +355,113 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public synchronized SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
throw new AssertionError();
|
||||
SortedDocValues instance = sortedInstances.get(field.number);
|
||||
if (instance == null) {
|
||||
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
|
||||
String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
|
||||
IndexInput data = null;
|
||||
IndexInput index = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
data = dir.openInput(dataName, state.context);
|
||||
index = dir.openInput(indexName, state.context);
|
||||
switch(LegacyDocValuesType.valueOf(field.getAttribute(legacyKey))) {
|
||||
case BYTES_FIXED_SORTED:
|
||||
instance = loadBytesFixedSorted(field, data, index);
|
||||
break;
|
||||
case BYTES_VAR_SORTED:
|
||||
instance = loadBytesVarSorted(field, data, index);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(data, index);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, index);
|
||||
}
|
||||
}
|
||||
sortedInstances.put(field.number, instance);
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
private SortedDocValues loadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
|
||||
CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
|
||||
CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
|
||||
|
||||
final int fixedLength = data.readInt();
|
||||
final int valueCount = index.readInt();
|
||||
|
||||
// nocommit? can the current impl even handle > 2G?
|
||||
final byte[] bytes = new byte[fixedLength*valueCount];
|
||||
data.readBytes(bytes, 0, bytes.length);
|
||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||
|
||||
return new SortedDocValues() {
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return (int) reader.get(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
result.bytes = bytes;
|
||||
result.offset = ord * fixedLength;
|
||||
result.length = fixedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private SortedDocValues loadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
|
||||
CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
|
||||
CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
|
||||
|
||||
long maxAddress = index.readLong();
|
||||
// nocommit? can the current impl even handle > 2G?
|
||||
final byte[] bytes = new byte[(int)maxAddress];
|
||||
data.readBytes(bytes, 0, bytes.length);
|
||||
|
||||
final PackedInts.Reader addressReader = PackedInts.getReader(index);
|
||||
final PackedInts.Reader ordsReader = PackedInts.getReader(index);
|
||||
|
||||
final int valueCount = addressReader.size() - 1;
|
||||
|
||||
return new SortedDocValues() {
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return (int)ordsReader.get(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
long startAddress = addressReader.get(ord);
|
||||
long endAddress = addressReader.get(ord+1);
|
||||
result.bytes = bytes;
|
||||
result.offset = (int)startAddress;
|
||||
result.length = (int)(endAddress - startAddress);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
package org.apache.lucene.codecs.lucene40;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
// nocommit: still a lie, but allows javadocs @links to work
|
||||
// nocommit: make read-only and move to impersonator
|
||||
public class Lucene40LyingDocValuesFormat extends Lucene42DocValuesFormat {
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return super.fieldsProducer(state);
|
||||
}
|
||||
}
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.codecs.StoredFieldsWriter;
|
|||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LyingDocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
|
||||
|
@ -125,7 +125,7 @@ public class Lucene41Codec extends Codec {
|
|||
}
|
||||
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
private final DocValuesFormat dvFormat = new Lucene40LyingDocValuesFormat();
|
||||
private final DocValuesFormat dvFormat = new Lucene40DocValuesFormat();
|
||||
private final NormsFormat normsFormat = new Lucene40NormsFormat();
|
||||
|
||||
@Override
|
||||
|
|
|
@ -219,12 +219,16 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
|
|||
Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
|
||||
|
||||
/* values */
|
||||
|
||||
final long startPos = data.getFilePointer();
|
||||
|
||||
for (BytesRef v : values) {
|
||||
data.writeBytes(v.bytes, v.offset, v.length);
|
||||
}
|
||||
|
||||
/* addresses */
|
||||
|
||||
final long maxAddress = data.getFilePointer() - startPos;
|
||||
index.writeVLong(maxAddress);
|
||||
|
||||
|
@ -245,8 +249,121 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
|
|||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||
assert false;
|
||||
}
|
||||
// examine the values to determine best type to use
|
||||
int minLength = Integer.MAX_VALUE;
|
||||
int maxLength = Integer.MIN_VALUE;
|
||||
for (BytesRef b : values) {
|
||||
minLength = Math.min(minLength, b.length);
|
||||
maxLength = Math.max(maxLength, b.length);
|
||||
}
|
||||
|
||||
boolean success = false;
|
||||
IndexOutput data = null;
|
||||
IndexOutput index = null;
|
||||
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
|
||||
String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
|
||||
|
||||
try {
|
||||
data = dir.createOutput(dataName, state.context);
|
||||
index = dir.createOutput(indexName, state.context);
|
||||
if (minLength == maxLength) {
|
||||
// fixed byte[]
|
||||
addFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
|
||||
} else {
|
||||
// var byte[]
|
||||
addVarSortedBytesField(field, data, index, values, docToOrd);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(data, index);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(data, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd, int length) throws IOException {
|
||||
field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.name());
|
||||
|
||||
CodecUtil.writeHeader(data,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
|
||||
|
||||
CodecUtil.writeHeader(index,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX,
|
||||
Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
|
||||
|
||||
/* values */
|
||||
|
||||
data.writeInt(length);
|
||||
int valueCount = 0;
|
||||
for (BytesRef v : values) {
|
||||
data.writeBytes(v.bytes, v.offset, v.length);
|
||||
valueCount++;
|
||||
}
|
||||
|
||||
/* ordinals */
|
||||
|
||||
index.writeInt(valueCount);
|
||||
int maxDoc = state.segmentInfo.getDocCount();
|
||||
assert valueCount > 0;
|
||||
final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
|
||||
for (Number n : docToOrd) {
|
||||
w.add(n.longValue());
|
||||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
private void addVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||
field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.name());
|
||||
|
||||
CodecUtil.writeHeader(data,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
|
||||
|
||||
CodecUtil.writeHeader(index,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
|
||||
Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
|
||||
|
||||
/* values */
|
||||
|
||||
final long startPos = data.getFilePointer();
|
||||
|
||||
int valueCount = 0;
|
||||
for (BytesRef v : values) {
|
||||
data.writeBytes(v.bytes, v.offset, v.length);
|
||||
valueCount++;
|
||||
}
|
||||
|
||||
/* addresses */
|
||||
|
||||
final long maxAddress = data.getFilePointer() - startPos;
|
||||
index.writeLong(maxAddress);
|
||||
|
||||
assert valueCount != Integer.MAX_VALUE; // unsupported by the 4.0 impl
|
||||
|
||||
final PackedInts.Writer w = PackedInts.getWriter(index, valueCount+1, PackedInts.bitsRequired(maxAddress), PackedInts.DEFAULT);
|
||||
long currentPosition = 0;
|
||||
for (BytesRef v : values) {
|
||||
w.add(currentPosition);
|
||||
currentPosition += v.length;
|
||||
}
|
||||
// write sentinel
|
||||
assert currentPosition == maxAddress;
|
||||
w.add(currentPosition);
|
||||
w.finish();
|
||||
|
||||
/* ordinals */
|
||||
|
||||
final int maxDoc = state.segmentInfo.getDocCount();
|
||||
assert valueCount > 0;
|
||||
final PackedInts.Writer ords = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
|
||||
for (Number n : docToOrd) {
|
||||
ords.add(n.longValue());
|
||||
}
|
||||
ords.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
package org.apache.lucene.codecs.lucene40;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class Lucene40LyingRWDocValuesFormat extends Lucene40LyingDocValuesFormat {
|
||||
private final DocValuesFormat lie = new Lucene42DocValuesFormat();
|
||||
|
||||
// nocommit: a lie
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final DocValuesConsumer delegate = lie.fieldsConsumer(state);
|
||||
return new DocValuesConsumer() {
|
||||
|
||||
@Override
|
||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||
// hack: here we would examine the numerics and simulate in the impersonator the best we can
|
||||
// e.g. if they are all in byte/int range write fixed, otherwise write packed or whatever
|
||||
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.VAR_INTS.name());
|
||||
delegate.addNumericField(field, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
||||
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_STRAIGHT.name());
|
||||
delegate.addBinaryField(field, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||
field.putAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_VAR_SORTED.name());
|
||||
delegate.addSortedField(field, values, docToOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
delegate.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -33,8 +33,7 @@ public final class Lucene40RWCodec extends Lucene40Codec {
|
|||
}
|
||||
};
|
||||
|
||||
//private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||
private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
|
||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||
|
||||
@Override
|
||||
|
|
|
@ -9,7 +9,7 @@ import org.apache.lucene.codecs.NormsFormat;
|
|||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LyingRWDocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
|
||||
|
||||
/*
|
||||
|
@ -41,7 +41,7 @@ public class Lucene41RWCodec extends Lucene41Codec {
|
|||
}
|
||||
};
|
||||
|
||||
private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
|
||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue