codec owns the datastructure for sortedbytes too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411062 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-11-19 04:25:09 +00:00
parent a5de838806
commit 60781fdd1d
5 changed files with 146 additions and 46 deletions

View File

@ -272,6 +272,11 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
SimpleTextUtil.writeNewline(data);
// write fixedlength
SimpleTextUtil.write(data, FIXEDLENGTH);
SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@ -438,6 +443,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
assert startsWith(NUMVALUES);
field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
readLine();
assert startsWith(FIXEDLENGTH);
field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
readLine();
@ -613,6 +621,21 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
public int getValueCount() {
return field.numValues;
}
@Override
public int size() {
return maxDoc;
}
@Override
public boolean isFixedLength() {
return field.fixedLength;
}
@Override
public int maxLength() {
return field.maxLength;
}
};
}

View File

@ -106,7 +106,7 @@ public abstract class SortedDocValuesConsumer {
state.reader = reader;
state.values = reader.getSortedDocValues(mergeState.fieldInfo.name);
if (state.values == null) {
state.values = SortedDocValues.DEFAULT;
state.values = new SortedDocValues.EMPTY(maxDoc);
}
segStates.add(state);

View File

@ -29,6 +29,8 @@ public abstract class BinaryDocValues {
public abstract boolean isFixedLength();
public abstract int maxLength();
// nocommit: rethink this api? alternative is boolean on atomicreader...?
// doc that the thing returned here must be thread safe...
public BinaryDocValues newRAMInstance() {
// TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
// nocommit used packed ints/pagedbytes and so on
@ -66,6 +68,12 @@ public abstract class BinaryDocValues {
public int maxLength() {
return maxLength;
}
@Override
public BinaryDocValues newRAMInstance() {
// nocommit: ugly, maybe throw exception instead?
return this;
}
};
}

View File

@ -20,7 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.util.BytesRef;
// nocommit need marker interface?
public abstract class SortedDocValues {
public abstract class SortedDocValues extends BinaryDocValues {
// nocommit throws IOE or not?
public abstract int getOrd(int docID);
@ -30,26 +30,110 @@ public abstract class SortedDocValues {
// nocommit throws IOE or not?
public abstract int getValueCount();
// nocommit binary search lookup?
public static final SortedDocValues DEFAULT = new SortedDocValues() {
@Override
public void get(int docID, BytesRef result) {
int ord = getOrd(docID);
lookupOrd(ord, result);
}
@Override
public SortedDocValues newRAMInstance() {
// nocommit optimize this
// nocommit, see also BinaryDocValues nocommits
final int maxDoc = size();
final int maxLength = maxLength();
final boolean fixedLength = isFixedLength();
final int valueCount = getValueCount();
// nocommit used packed ints and so on
final byte[][] values = new byte[valueCount][];
BytesRef scratch = new BytesRef();
for(int ord=0;ord<values.length;ord++) {
lookupOrd(ord, scratch);
values[ord] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
}
final int[] docToOrd = new int[maxDoc];
for(int docID=0;docID<maxDoc;docID++) {
docToOrd[docID] = getOrd(docID);
}
return new SortedDocValues() {
@Override
public int getOrd(int docID) {
return 0;
return docToOrd[docID];
}
@Override
public void lookupOrd(int ord, BytesRef ret) {
if (ord != 0) {
throw new IllegalArgumentException("ord should be 0");
}
ret.length = 0;
public void lookupOrd(int ord, BytesRef result) {
result.bytes = values[ord];
result.offset = 0;
result.length = result.bytes.length;
}
@Override
public int getValueCount() {
return 1;
return valueCount;
}
@Override
public int size() {
return maxDoc;
}
@Override
public boolean isFixedLength() {
return fixedLength;
}
@Override
public int maxLength() {
return maxLength;
}
@Override
public SortedDocValues newRAMInstance() {
return this; // see the nocommit in BinaryDocValues
}
};
}
// nocommit binary search lookup?
public static class EMPTY extends SortedDocValues {
private final int size;
public EMPTY(int size) {
this.size = size;
}
@Override
public int getOrd(int docID) {
return 0;
}
@Override
public void lookupOrd(int ord, BytesRef result) {
result.length = 0;
}
@Override
public int getValueCount() {
return 1;
}
@Override
public int size() {
return size;
}
@Override
public boolean isFixedLength() {
return true;
}
@Override
public int maxLength() {
return 0;
}
}
}

View File

@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.Reader;
// nocommit rename to UninvertFieldCacheImpl or something ...
@ -1141,58 +1142,42 @@ class FieldCacheImpl implements FieldCache {
final int maxDoc = reader.maxDoc();
SortedDocValues valuesIn = reader.getSortedDocValues(key.field);
if (valuesIn != null) {
// nocommit used packed ints like below!
final byte[][] values = new byte[valuesIn.getValueCount()][];
BytesRef scratch = new BytesRef();
for(int ord=0;ord<values.length;ord++) {
valuesIn.lookupOrd(ord, scratch);
values[ord] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
}
final int[] docToOrd = new int[maxDoc];
for(int docID=0;docID<maxDoc;docID++) {
docToOrd[docID] = valuesIn.getOrd(docID);
}
final SortedDocValues ramInstance = valuesIn.newRAMInstance();
return new DocTermsIndex() {
@Override
public PackedInts.Reader getDocToOrd() {
// nocommit
return null;
}
@Override
public int numOrd() {
return values.length;
public BytesRef lookup(int ord, BytesRef reuse) {
ramInstance.lookupOrd(ord, reuse);
return reuse;
}
@Override
public int getOrd(int docID) {
return docToOrd[docID];
return ramInstance.getOrd(docID);
}
@Override
public int numOrd() {
return ramInstance.getValueCount();
}
@Override
public int size() {
return docToOrd.length;
}
@Override
public BytesRef lookup(int ord, BytesRef ret) {
ret.bytes = values[ord];
ret.length = ret.bytes.length;
ret.offset = 0;
return ret;
return ramInstance.size();
}
@Override
public TermsEnum getTermsEnum() {
// nocommit
// nocommit: to the codec api? or can that termsenum just use this thing?
return null;
}
@Override
public Reader getDocToOrd() {
// nocommit: add this to the codec api!
return null;
}
};
} else {
Terms terms = reader.terms(key.field);