mirror of https://github.com/apache/lucene.git
codec owns the datastructure for sortedbytes too
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411062 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5de838806
commit
60781fdd1d
|
@ -272,6 +272,11 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
// write fixedlength
|
||||
SimpleTextUtil.write(data, FIXEDLENGTH);
|
||||
SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
// write maxLength
|
||||
SimpleTextUtil.write(data, MAXLENGTH);
|
||||
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
|
||||
|
@ -438,6 +443,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
assert startsWith(NUMVALUES);
|
||||
field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
|
||||
readLine();
|
||||
assert startsWith(FIXEDLENGTH);
|
||||
field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
|
||||
readLine();
|
||||
assert startsWith(MAXLENGTH);
|
||||
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
|
||||
readLine();
|
||||
|
@ -613,6 +621,21 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
|||
public int getValueCount() {
|
||||
return field.numValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return field.fixedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return field.maxLength;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ public abstract class SortedDocValuesConsumer {
|
|||
state.reader = reader;
|
||||
state.values = reader.getSortedDocValues(mergeState.fieldInfo.name);
|
||||
if (state.values == null) {
|
||||
state.values = SortedDocValues.DEFAULT;
|
||||
state.values = new SortedDocValues.EMPTY(maxDoc);
|
||||
}
|
||||
|
||||
segStates.add(state);
|
||||
|
|
|
@ -29,6 +29,8 @@ public abstract class BinaryDocValues {
|
|||
public abstract boolean isFixedLength();
|
||||
public abstract int maxLength();
|
||||
|
||||
// nocommit: rethink this api? alternative is boolean on atomicreader...?
|
||||
// doc that the thing returned here must be thread safe...
|
||||
public BinaryDocValues newRAMInstance() {
|
||||
// TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
|
||||
// nocommit used packed ints/pagedbytes and so on
|
||||
|
@ -66,6 +68,12 @@ public abstract class BinaryDocValues {
|
|||
public int maxLength() {
|
||||
return maxLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues newRAMInstance() {
|
||||
// nocommit: ugly, maybe throw exception instead?
|
||||
return this;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
// nocommit need marker interface?
|
||||
public abstract class SortedDocValues {
|
||||
public abstract class SortedDocValues extends BinaryDocValues {
|
||||
// nocommit throws IOE or not?
|
||||
public abstract int getOrd(int docID);
|
||||
|
||||
|
@ -30,26 +30,110 @@ public abstract class SortedDocValues {
|
|||
// nocommit throws IOE or not?
|
||||
public abstract int getValueCount();
|
||||
|
||||
// nocommit binary search lookup?
|
||||
public static final SortedDocValues DEFAULT = new SortedDocValues() {
|
||||
|
||||
@Override
|
||||
public void get(int docID, BytesRef result) {
|
||||
int ord = getOrd(docID);
|
||||
lookupOrd(ord, result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues newRAMInstance() {
|
||||
// nocommit optimize this
|
||||
// nocommit, see also BinaryDocValues nocommits
|
||||
final int maxDoc = size();
|
||||
final int maxLength = maxLength();
|
||||
final boolean fixedLength = isFixedLength();
|
||||
final int valueCount = getValueCount();
|
||||
// nocommit used packed ints and so on
|
||||
final byte[][] values = new byte[valueCount][];
|
||||
BytesRef scratch = new BytesRef();
|
||||
for(int ord=0;ord<values.length;ord++) {
|
||||
lookupOrd(ord, scratch);
|
||||
values[ord] = new byte[scratch.length];
|
||||
System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
|
||||
}
|
||||
|
||||
final int[] docToOrd = new int[maxDoc];
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
docToOrd[docID] = getOrd(docID);
|
||||
}
|
||||
return new SortedDocValues() {
|
||||
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return 0;
|
||||
return docToOrd[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef ret) {
|
||||
if (ord != 0) {
|
||||
throw new IllegalArgumentException("ord should be 0");
|
||||
}
|
||||
ret.length = 0;
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
result.bytes = values[ord];
|
||||
result.offset = 0;
|
||||
result.length = result.bytes.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return 1;
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return fixedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return maxLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues newRAMInstance() {
|
||||
return this; // see the nocommit in BinaryDocValues
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// nocommit binary search lookup?
|
||||
public static class EMPTY extends SortedDocValues {
|
||||
private final int size;
|
||||
|
||||
public EMPTY(int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
result.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFixedLength() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxLength() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||
|
||||
// nocommit rename to UninvertFieldCacheImpl or something ...
|
||||
|
||||
|
@ -1141,58 +1142,42 @@ class FieldCacheImpl implements FieldCache {
|
|||
final int maxDoc = reader.maxDoc();
|
||||
SortedDocValues valuesIn = reader.getSortedDocValues(key.field);
|
||||
if (valuesIn != null) {
|
||||
// nocommit used packed ints like below!
|
||||
final byte[][] values = new byte[valuesIn.getValueCount()][];
|
||||
BytesRef scratch = new BytesRef();
|
||||
for(int ord=0;ord<values.length;ord++) {
|
||||
valuesIn.lookupOrd(ord, scratch);
|
||||
values[ord] = new byte[scratch.length];
|
||||
System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
|
||||
}
|
||||
|
||||
final int[] docToOrd = new int[maxDoc];
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
docToOrd[docID] = valuesIn.getOrd(docID);
|
||||
}
|
||||
|
||||
final SortedDocValues ramInstance = valuesIn.newRAMInstance();
|
||||
return new DocTermsIndex() {
|
||||
|
||||
@Override
|
||||
public PackedInts.Reader getDocToOrd() {
|
||||
// nocommit
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numOrd() {
|
||||
return values.length;
|
||||
public BytesRef lookup(int ord, BytesRef reuse) {
|
||||
ramInstance.lookupOrd(ord, reuse);
|
||||
return reuse;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return docToOrd[docID];
|
||||
return ramInstance.getOrd(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numOrd() {
|
||||
return ramInstance.getValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return docToOrd.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookup(int ord, BytesRef ret) {
|
||||
ret.bytes = values[ord];
|
||||
ret.length = ret.bytes.length;
|
||||
ret.offset = 0;
|
||||
return ret;
|
||||
return ramInstance.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum getTermsEnum() {
|
||||
// nocommit
|
||||
// nocommit: to the codec api? or can that termsenum just use this thing?
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getDocToOrd() {
|
||||
// nocommit: add this to the codec api!
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
} else {
|
||||
|
||||
Terms terms = reader.terms(key.field);
|
||||
|
|
Loading…
Reference in New Issue