mirror of https://github.com/apache/lucene.git
add in-ram sortedbytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1433250 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
964960eccf
commit
6dcc657d91
|
@ -144,7 +144,7 @@ public class Lucene41Codec extends Codec {
|
|||
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
// nocommit
|
||||
private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Disk");
|
||||
private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Lucene41");
|
||||
|
||||
private final SimpleNormsFormat simpleNormsFormat = new Lucene41SimpleNormsFormat();
|
||||
|
||||
|
|
|
@ -30,6 +30,12 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||
|
||||
|
@ -50,7 +56,6 @@ class Lucene41SimpleDocValuesConsumer extends SimpleDVConsumer {
|
|||
static final byte FST = 2;
|
||||
|
||||
final IndexOutput data, meta;
|
||||
final int maxDoc;
|
||||
|
||||
Lucene41SimpleDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
boolean success = false;
|
||||
|
@ -61,7 +66,6 @@ class Lucene41SimpleDocValuesConsumer extends SimpleDVConsumer {
|
|||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
meta = state.directory.createOutput(metaName, state.context);
|
||||
CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
|
||||
maxDoc = state.segmentInfo.getDocCount();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -217,7 +221,24 @@ class Lucene41SimpleDocValuesConsumer extends SimpleDVConsumer {
|
|||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
||||
throw new AssertionError();
|
||||
// write the ordinals as numerics
|
||||
addNumericField(field, docToOrd);
|
||||
|
||||
// write the values as FST
|
||||
meta.writeVInt(field.number);
|
||||
meta.writeByte(FST);
|
||||
meta.writeLong(data.getFilePointer());
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||
IntsRef scratch = new IntsRef();
|
||||
long ord = 0;
|
||||
for (BytesRef v : values) {
|
||||
builder.add(Util.toIntsRef(v, scratch), ord);
|
||||
ord++;
|
||||
}
|
||||
FST<Long> fst = builder.finish();
|
||||
fst.save(data);
|
||||
meta.writeVInt((int)ord);
|
||||
}
|
||||
|
||||
// nocommit: can/should we make override merge + make it smarter to pull the values
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.SimpleDVProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -33,6 +34,12 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
||||
|
@ -51,6 +58,9 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
private final Map<Integer,BinaryDocValues> binaryInstances =
|
||||
new HashMap<Integer,BinaryDocValues>();
|
||||
|
||||
private final Map<Integer,FST<Long>> fstInstances =
|
||||
new HashMap<Integer,FST<Long>>();
|
||||
|
||||
Lucene41SimpleDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
|
@ -96,6 +106,13 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
entry.minLength = meta.readVInt();
|
||||
entry.maxLength = meta.readVInt();
|
||||
binaries.put(fieldNumber, entry);
|
||||
} else if (fieldType == Lucene41SimpleDocValuesConsumer.FST) {
|
||||
FSTEntry entry = new FSTEntry();
|
||||
entry.offset = meta.readLong();
|
||||
entry.numOrds = meta.readVInt();
|
||||
fsts.put(fieldNumber, entry);
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
|
||||
}
|
||||
fieldNumber = meta.readVInt();
|
||||
}
|
||||
|
@ -113,7 +130,6 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
|
||||
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
|
||||
NumericEntry entry = numerics.get(field.number);
|
||||
final IndexInput data = this.data.clone();
|
||||
data.seek(entry.offset);
|
||||
if (entry.tableized) {
|
||||
int size = data.readVInt();
|
||||
|
@ -154,7 +170,6 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
|
||||
private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
|
||||
BinaryEntry entry = binaries.get(field.number);
|
||||
final IndexInput data = this.data.clone();
|
||||
data.seek(entry.offset);
|
||||
assert entry.numBytes < Integer.MAX_VALUE; // nocommit
|
||||
final byte[] bytes = new byte[(int)entry.numBytes];
|
||||
|
@ -184,11 +199,49 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
throw new AssertionError();
|
||||
final FSTEntry entry = fsts.get(field.number);
|
||||
FST<Long> instance;
|
||||
synchronized(this) {
|
||||
instance = fstInstances.get(field.number);
|
||||
if (instance == null) {
|
||||
data.seek(entry.offset);
|
||||
instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
|
||||
fstInstances.put(field.number, instance);
|
||||
}
|
||||
}
|
||||
final NumericDocValues docToOrd = getNumeric(field);
|
||||
final FST<Long> fst = instance;
|
||||
|
||||
// per-thread resources
|
||||
final BytesReader in = fst.getBytesReader(0);
|
||||
final Arc<Long> firstArc = new Arc<Long>();
|
||||
final Arc<Long> scratchArc = new Arc<Long>();
|
||||
final IntsRef scratchInts = new IntsRef();
|
||||
|
||||
return new SortedDocValues() {
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return (int) docToOrd.get(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
try {
|
||||
in.setPosition(0);
|
||||
fst.getFirstArc(firstArc);
|
||||
Util.toBytesRef(Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts), result);
|
||||
} catch (IOException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return entry.numOrds;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -212,5 +265,4 @@ class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
|
|||
long offset;
|
||||
int numOrds;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -110,6 +110,12 @@ public final class Util {
|
|||
|
||||
final IntsRef result = new IntsRef();
|
||||
|
||||
return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
|
||||
}
|
||||
|
||||
/** Expert: like {@link Util#getByOutput(FST, long)} except reusing */
|
||||
// nocommit
|
||||
public static IntsRef getByOutput(FST<Long> fst, long targetOutput, BytesReader in, Arc<Long> arc, Arc<Long> scratchArc, IntsRef result) throws IOException {
|
||||
long output = arc.output;
|
||||
int upto = 0;
|
||||
|
||||
|
|
Loading…
Reference in New Issue