mirror of https://github.com/apache/lucene.git
fieldcache=caching, codec=datastructure/encoding (TODO: fix sorted/numeric too)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
216e72ce47
commit
a5de838806
|
@ -29,9 +29,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.BinaryDocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.NumericDocValuesConsumer;
|
||||
import org.apache.lucene.codecs.PerDocProducer;
|
||||
import org.apache.lucene.codecs.SimpleDVConsumer;
|
||||
import org.apache.lucene.codecs.SimpleDVProducer;
|
||||
import org.apache.lucene.codecs.SimpleDocValuesFormat;
|
||||
|
@ -54,7 +52,6 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -70,6 +67,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
final static BytesRef MINVALUE = new BytesRef(" minvalue ");
|
||||
final static BytesRef PATTERN = new BytesRef(" pattern ");
|
||||
// used for bytes
|
||||
final static BytesRef FIXEDLENGTH = new BytesRef(" fixedlength ");
|
||||
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
|
||||
final static BytesRef LENGTH = new BytesRef("length ");
|
||||
// used for sorted bytes
|
||||
|
@ -103,6 +101,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
* for bytes this is also a "fixed-width" file, for example:
|
||||
* <pre>
|
||||
* field myField
|
||||
* fixedlength false
|
||||
* maxlength 8
|
||||
* pattern 0
|
||||
* length 6
|
||||
|
@ -142,8 +141,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
static class SimpleTextDocValuesWriter extends SimpleDVConsumer {
|
||||
final IndexOutput data;
|
||||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
final int numDocs; // for asserting
|
||||
final int numDocs;
|
||||
private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
|
||||
|
||||
SimpleTextDocValuesWriter(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
|
@ -215,6 +213,10 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
public BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, final int maxLength) throws IOException {
|
||||
assert fieldSeen(field.name);
|
||||
writeFieldEntry(field);
|
||||
// write fixedlength
|
||||
SimpleTextUtil.write(data, FIXEDLENGTH);
|
||||
SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
// write maxLength
|
||||
SimpleTextUtil.write(data, MAXLENGTH);
|
||||
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
|
||||
|
@ -377,6 +379,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
String pattern;
|
||||
String ordPattern;
|
||||
int maxLength;
|
||||
boolean fixedLength;
|
||||
long minValue;
|
||||
int numValues;
|
||||
};
|
||||
|
@ -419,6 +422,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
field.dataStartFilePointer = data.getFilePointer();
|
||||
data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
|
||||
} else if (DocValues.isBytes(dvType)) {
|
||||
readLine();
|
||||
assert startsWith(FIXEDLENGTH);
|
||||
field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
|
||||
readLine();
|
||||
assert startsWith(MAXLENGTH);
|
||||
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
|
||||
|
@ -529,6 +535,21 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return field.fixedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return field.maxLength;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -21,8 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -40,7 +38,7 @@ public abstract class BinaryDocValuesConsumer {
|
|||
|
||||
BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
|
||||
if (source == null) {
|
||||
source = BinaryDocValues.DEFAULT;
|
||||
source = new BinaryDocValues.EMPTY(maxDoc);
|
||||
}
|
||||
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
|
|
|
@ -104,6 +104,7 @@ public abstract class SimpleDVConsumer implements Closeable {
|
|||
// dead simple impl: codec can optimize
|
||||
protected void mergeBinaryField(MergeState mergeState) throws IOException {
|
||||
// first compute fixedLength and maxLength of live ones to be merged.
|
||||
// nocommit: messy, and can be simplified by using docValues.maxLength/fixedLength in many cases.
|
||||
boolean fixedLength = true;
|
||||
int maxLength = -1;
|
||||
BytesRef bytes = new BytesRef();
|
||||
|
@ -112,7 +113,7 @@ public abstract class SimpleDVConsumer implements Closeable {
|
|||
final Bits liveDocs = reader.getLiveDocs();
|
||||
BinaryDocValues docValues = reader.getBinaryDocValues(mergeState.fieldInfo.name);
|
||||
if (docValues == null) {
|
||||
docValues = BinaryDocValues.DEFAULT;
|
||||
docValues = new BinaryDocValues.EMPTY(maxDoc);
|
||||
}
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (liveDocs == null || liveDocs.get(i)) {
|
||||
|
|
|
@ -23,11 +23,77 @@ import org.apache.lucene.util.BytesRef;
|
|||
public abstract class BinaryDocValues {
|
||||
// nocommit throws IOE or not?
|
||||
public abstract void get(int docID, BytesRef result);
|
||||
|
||||
public abstract int size();
|
||||
|
||||
public abstract boolean isFixedLength();
|
||||
public abstract int maxLength();
|
||||
|
||||
public BinaryDocValues newRAMInstance() {
|
||||
// TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
|
||||
// nocommit used packed ints/pagedbytes and so on
|
||||
final int maxDoc = size();
|
||||
final int maxLength = maxLength();
|
||||
final boolean fixedLength = isFixedLength();
|
||||
final byte[][] values = new byte[maxDoc][];
|
||||
BytesRef scratch = new BytesRef();
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
get(docID, scratch);
|
||||
values[docID] = new byte[scratch.length];
|
||||
System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
|
||||
}
|
||||
|
||||
return new BinaryDocValues() {
|
||||
|
||||
public static final BinaryDocValues DEFAULT = new BinaryDocValues() {
|
||||
@Override
|
||||
public void get(int docID, BytesRef ret) {
|
||||
ret.length = 0;
|
||||
public void get(int docID, BytesRef result) {
|
||||
result.bytes = values[docID];
|
||||
result.offset = 0;
|
||||
result.length = result.bytes.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return fixedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return maxLength;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static class EMPTY extends BinaryDocValues {
|
||||
private final int size;
|
||||
|
||||
public EMPTY(int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void get(int docID, BytesRef result) {
|
||||
result.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1334,22 +1334,15 @@ class FieldCacheImpl implements FieldCache {
|
|||
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
|
||||
throws IOException {
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
|
||||
if (valuesIn != null) {
|
||||
// nocommit used packed ints like below!
|
||||
final byte[][] values = new byte[maxDoc][];
|
||||
BytesRef scratch = new BytesRef();
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
valuesIn.get(docID, scratch);
|
||||
values[docID] = new byte[scratch.length];
|
||||
System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
|
||||
}
|
||||
|
||||
final BinaryDocValues ramInstance = valuesIn.newRAMInstance();
|
||||
return new DocTerms() {
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
public BytesRef getTerm(int docID, BytesRef ret) {
|
||||
ramInstance.get(docID, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1359,15 +1352,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getTerm(int docID, BytesRef ret) {
|
||||
ret.bytes = values[docID];
|
||||
ret.length = ret.bytes.length;
|
||||
ret.offset = 0;
|
||||
return ret;
|
||||
}
|
||||
public int size() {
|
||||
return ramInstance.size();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
Terms terms = reader.terms(key.field);
|
||||
|
||||
final float acceptableOverheadRatio = ((Float) key.custom).floatValue();
|
||||
|
|
Loading…
Reference in New Issue