fieldcache=caching, codec=datastructure/encoding (TODO: fix sorted/numeric too)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-11-19 04:00:29 +00:00
parent 216e72ce47
commit a5de838806
5 changed files with 107 additions and 31 deletions

View File

@ -29,9 +29,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.BinaryDocValuesConsumer;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.NumericDocValuesConsumer;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.codecs.SimpleDocValuesFormat;
@ -54,7 +52,6 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.PackedInts;
/**
@ -70,6 +67,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
final static BytesRef MINVALUE = new BytesRef(" minvalue ");
final static BytesRef PATTERN = new BytesRef(" pattern ");
// used for bytes
final static BytesRef FIXEDLENGTH = new BytesRef(" fixedlength ");
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
final static BytesRef LENGTH = new BytesRef("length ");
// used for sorted bytes
@ -103,6 +101,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
* for bytes this is also a "fixed-width" file, for example:
* <pre>
* field myField
* fixedlength false
* maxlength 8
* pattern 0
* length 6
@ -142,8 +141,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
static class SimpleTextDocValuesWriter extends SimpleDVConsumer {
final IndexOutput data;
final BytesRef scratch = new BytesRef();
final int numDocs; // for asserting
final int numDocs;
private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
SimpleTextDocValuesWriter(Directory dir, SegmentInfo si, IOContext context) throws IOException {
@ -215,6 +213,10 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
public BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, final int maxLength) throws IOException {
assert fieldSeen(field.name);
writeFieldEntry(field);
// write fixedlength
SimpleTextUtil.write(data, FIXEDLENGTH);
SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@ -377,6 +379,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
String pattern;
String ordPattern;
int maxLength;
boolean fixedLength;
long minValue;
int numValues;
};
@ -419,6 +422,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
} else if (DocValues.isBytes(dvType)) {
readLine();
assert startsWith(FIXEDLENGTH);
field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@ -529,6 +535,21 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
throw new RuntimeException(ioe);
}
}
@Override
public int size() {
return maxDoc;
}
@Override
public boolean isFixedLength() {
return field.fixedLength;
}
@Override
public int maxLength() {
return field.maxLength;
}
};
}

View File

@ -21,8 +21,6 @@ import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -40,7 +38,7 @@ public abstract class BinaryDocValuesConsumer {
BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (source == null) {
source = BinaryDocValues.DEFAULT;
source = new BinaryDocValues.EMPTY(maxDoc);
}
for (int i = 0; i < maxDoc; i++) {

View File

@ -104,6 +104,7 @@ public abstract class SimpleDVConsumer implements Closeable {
// dead simple impl: codec can optimize
protected void mergeBinaryField(MergeState mergeState) throws IOException {
// first compute fixedLength and maxLength of live ones to be merged.
// nocommit: messy, and can be simplified by using docValues.maxLength/fixedLength in many cases.
boolean fixedLength = true;
int maxLength = -1;
BytesRef bytes = new BytesRef();
@ -112,7 +113,7 @@ public abstract class SimpleDVConsumer implements Closeable {
final Bits liveDocs = reader.getLiveDocs();
BinaryDocValues docValues = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (docValues == null) {
docValues = BinaryDocValues.DEFAULT;
docValues = new BinaryDocValues.EMPTY(maxDoc);
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {

View File

@ -23,11 +23,77 @@ import org.apache.lucene.util.BytesRef;
public abstract class BinaryDocValues {
// nocommit throws IOE or not?
public abstract void get(int docID, BytesRef result);
public abstract int size();
public abstract boolean isFixedLength();
public abstract int maxLength();
public BinaryDocValues newRAMInstance() {
// TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
// nocommit used packed ints/pagedbytes and so on
final int maxDoc = size();
final int maxLength = maxLength();
final boolean fixedLength = isFixedLength();
final byte[][] values = new byte[maxDoc][];
BytesRef scratch = new BytesRef();
for(int docID=0;docID<maxDoc;docID++) {
get(docID, scratch);
values[docID] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
}
return new BinaryDocValues() {
public static final BinaryDocValues DEFAULT = new BinaryDocValues() {
@Override
public void get(int docID, BytesRef ret) {
ret.length = 0;
public void get(int docID, BytesRef result) {
result.bytes = values[docID];
result.offset = 0;
result.length = result.bytes.length;
}
@Override
public int size() {
return maxDoc;
}
@Override
public boolean isFixedLength() {
return fixedLength;
}
@Override
public int maxLength() {
return maxLength;
}
};
}
public static class EMPTY extends BinaryDocValues {
private final int size;
public EMPTY(int size) {
this.size = size;
}
@Override
public void get(int docID, BytesRef result) {
result.length = 0;
}
@Override
public int size() {
return size;
}
@Override
public boolean isFixedLength() {
return true;
}
@Override
public int maxLength() {
return 0;
}
};
}

View File

@ -1334,22 +1334,15 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException {
final int maxDoc = reader.maxDoc();
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn != null) {
// nocommit used packed ints like below!
final byte[][] values = new byte[maxDoc][];
BytesRef scratch = new BytesRef();
for(int docID=0;docID<maxDoc;docID++) {
valuesIn.get(docID, scratch);
values[docID] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
}
final BinaryDocValues ramInstance = valuesIn.newRAMInstance();
return new DocTerms() {
@Override
public int size() {
return maxDoc;
public BytesRef getTerm(int docID, BytesRef ret) {
ramInstance.get(docID, ret);
return ret;
}
@Override
@ -1359,15 +1352,12 @@ class FieldCacheImpl implements FieldCache {
}
@Override
public BytesRef getTerm(int docID, BytesRef ret) {
ret.bytes = values[docID];
ret.length = ret.bytes.length;
ret.offset = 0;
return ret;
}
public int size() {
return ramInstance.size();
}
};
} else {
final int maxDoc = reader.maxDoc();
Terms terms = reader.terms(key.field);
final float acceptableOverheadRatio = ((Float) key.custom).floatValue();