mirror of https://github.com/apache/lucene.git
use group-varint for positions only
This commit is contained in:
parent
489b5225f9
commit
53d5541b20
|
@ -440,73 +440,46 @@ final class FreqProxTermsWriter extends TermsHash {
|
||||||
this.postingInput = buffer.toDataInput();
|
this.postingInput = buffer.toDataInput();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writePositionsWithOffsets(final PostingsEnum in, final DataOutput out, int freq)
|
|
||||||
throws IOException {
|
|
||||||
int previousPosition = 0;
|
|
||||||
int previousEndOffset = 0;
|
|
||||||
for (int i = 0; i < freq; i++) {
|
|
||||||
final int pos = in.nextPosition();
|
|
||||||
final BytesRef payload = in.getPayload();
|
|
||||||
// The low-order bit of token is set only if there is a payload, the
|
|
||||||
// previous bits are the delta-encoded position.
|
|
||||||
final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
|
|
||||||
previousPosition = pos;
|
|
||||||
|
|
||||||
final int startOffset = in.startOffset();
|
|
||||||
final int endOffset = in.endOffset();
|
|
||||||
posDeltaBuffer[0] = token;
|
|
||||||
posDeltaBuffer[1] = startOffset - previousEndOffset;
|
|
||||||
posDeltaBuffer[2] = endOffset - startOffset;
|
|
||||||
posDeltaBuffer[3] = payload == null ? 0 : payload.length;
|
|
||||||
out.writeGroupVInts(posDeltaBuffer, 4);
|
|
||||||
previousEndOffset = endOffset;
|
|
||||||
if (payload != null) {
|
|
||||||
out.writeBytes(payload.bytes, payload.offset, payload.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writePositionsWithOutOffsets(final PostingsEnum in, final DataOutput out, int freq)
|
|
||||||
throws IOException {
|
|
||||||
int previousPosition = 0;
|
|
||||||
if (storePayloads) {
|
|
||||||
for (int i = 0; i < freq; i++) {
|
|
||||||
final int pos = in.nextPosition();
|
|
||||||
final BytesRef payload = in.getPayload();
|
|
||||||
// The low-order bit of token is set only if there is a payload, the
|
|
||||||
// previous bits are the delta-encoded position.
|
|
||||||
final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
|
|
||||||
out.writeVInt(token);
|
|
||||||
previousPosition = pos;
|
|
||||||
if (payload != null) {
|
|
||||||
out.writeVInt(payload.length);
|
|
||||||
out.writeBytes(payload.bytes, payload.offset, payload.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // Only store token
|
|
||||||
int posWrite = 0;
|
|
||||||
for (int i = 0; i < freq; i++) {
|
|
||||||
final int pos = in.nextPosition();
|
|
||||||
final int token = (pos - previousPosition) << 1;
|
|
||||||
posDeltaBuffer[posWrite++] = token;
|
|
||||||
previousPosition = pos;
|
|
||||||
if (posWrite == POS_BUFFER_SIZE) {
|
|
||||||
flushPositions(out, posWrite);
|
|
||||||
posWrite = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
flushPositions(out, posWrite);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException {
|
private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException {
|
||||||
int freq = in.freq();
|
int freq = in.freq();
|
||||||
out.writeVInt(freq);
|
out.writeVInt(freq);
|
||||||
if (storePositions) {
|
if (storePositions) {
|
||||||
if (storeOffsets) {
|
int previousPosition = 0;
|
||||||
writePositionsWithOffsets(in, out, freq);
|
int previousEndOffset = 0;
|
||||||
} else {
|
if (storeOffsets || storePayloads) {
|
||||||
writePositionsWithOutOffsets(in, out, freq);
|
for (int i = 0; i < freq; i++) {
|
||||||
|
final int pos = in.nextPosition();
|
||||||
|
final BytesRef payload = in.getPayload();
|
||||||
|
// The low-order bit of token is set only if there is a payload, the
|
||||||
|
// previous bits are the delta-encoded position.
|
||||||
|
final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
|
||||||
|
out.writeVInt(token);
|
||||||
|
previousPosition = pos;
|
||||||
|
if (storeOffsets) { // don't encode offsets if they are not stored
|
||||||
|
final int startOffset = in.startOffset();
|
||||||
|
final int endOffset = in.endOffset();
|
||||||
|
out.writeVInt(startOffset - previousEndOffset);
|
||||||
|
out.writeVInt(endOffset - startOffset);
|
||||||
|
previousEndOffset = endOffset;
|
||||||
|
}
|
||||||
|
if (payload != null) {
|
||||||
|
out.writeVInt(payload.length);
|
||||||
|
out.writeBytes(payload.bytes, payload.offset, payload.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // Only store token
|
||||||
|
int posWrite = 0;
|
||||||
|
for (int i = 0; i < freq; i++) {
|
||||||
|
final int pos = in.nextPosition();
|
||||||
|
final int token = (pos - previousPosition) << 1;
|
||||||
|
posDeltaBuffer[posWrite++] = token;
|
||||||
|
previousPosition = pos;
|
||||||
|
if (posWrite == POS_BUFFER_SIZE) {
|
||||||
|
flushPositions(out, posWrite);
|
||||||
|
posWrite = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flushPositions(out, posWrite);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -566,47 +539,36 @@ final class FreqProxTermsWriter extends TermsHash {
|
||||||
out.writeGroupVInts(posDeltaBuffer, len);
|
out.writeGroupVInts(posDeltaBuffer, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readPayload(int token, boolean readPayloadLength) throws IOException {
|
|
||||||
if ((token & 1) != 0) {
|
|
||||||
payload.offset = 0;
|
|
||||||
if (readPayloadLength) {
|
|
||||||
payload.length = postingInput.readVInt();
|
|
||||||
}
|
|
||||||
if (payload.length > payload.bytes.length) {
|
|
||||||
payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)];
|
|
||||||
}
|
|
||||||
postingInput.readBytes(payload.bytes, 0, payload.length);
|
|
||||||
} else {
|
|
||||||
payload.length = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextPosition() throws IOException {
|
public int nextPosition() throws IOException {
|
||||||
if (storePositions == false) {
|
if (storePositions == false) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (storeOffsets) {
|
|
||||||
postingInput.readGroupVInts(posDeltaBuffer, 4);
|
if (storeOffsets || storePayloads) {
|
||||||
final int token = (int) posDeltaBuffer[0];
|
final int token = postingInput.readVInt();
|
||||||
pos += token >>> 1;
|
pos += token >>> 1;
|
||||||
startOffset = endOffset + (int) posDeltaBuffer[1];
|
if (storeOffsets) {
|
||||||
endOffset = startOffset + (int) posDeltaBuffer[2];
|
startOffset = endOffset + postingInput.readVInt();
|
||||||
payload.length = (int) posDeltaBuffer[3];
|
endOffset = startOffset + postingInput.readVInt();
|
||||||
readPayload(token, false);
|
|
||||||
} else {
|
|
||||||
if (storePayloads) {
|
|
||||||
final int token = postingInput.readVInt();
|
|
||||||
pos += token >>> 1;
|
|
||||||
readPayload(token, true);
|
|
||||||
} else {
|
|
||||||
// decode token from group-varint
|
|
||||||
if (posBufferUpto == POS_BUFFER_SIZE) {
|
|
||||||
refillPositions();
|
|
||||||
}
|
|
||||||
pos += posDeltaBuffer[posBufferUpto] >>> 1;
|
|
||||||
posBufferUpto++;
|
|
||||||
}
|
}
|
||||||
|
if ((token & 1) != 0) {
|
||||||
|
payload.offset = 0;
|
||||||
|
payload.length = postingInput.readVInt();
|
||||||
|
if (payload.length > payload.bytes.length) {
|
||||||
|
payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)];
|
||||||
|
}
|
||||||
|
postingInput.readBytes(payload.bytes, 0, payload.length);
|
||||||
|
} else {
|
||||||
|
payload.length = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// decode token from group-varint
|
||||||
|
if (posBufferUpto == POS_BUFFER_SIZE) {
|
||||||
|
refillPositions();
|
||||||
|
}
|
||||||
|
pos += posDeltaBuffer[posBufferUpto] >>> 1;
|
||||||
|
posBufferUpto++;
|
||||||
}
|
}
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue