mirror of https://github.com/apache/lucene.git
LUCENE-5675: zig-zag encode the versions (loses 1 bit); check the min/max version
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596974 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a136e3113
commit
da6a93f1ca
|
@ -58,6 +58,7 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
|
||||
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||
int delDocLimit = segDeletes.get(deleteTerm);
|
||||
assert delDocLimit < DocsEnum.NO_MORE_DOCS;
|
||||
while (true) {
|
||||
int doc = docsEnum.nextDoc();
|
||||
if (doc < delDocLimit) {
|
||||
|
@ -96,7 +97,7 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
// Sort by field name
|
||||
CollectionUtil.introSort(allFields);
|
||||
|
||||
FreqProxFields fields = new FreqProxFields(allFields);
|
||||
Fields fields = new FreqProxFields(allFields);
|
||||
|
||||
applyDeletes(state, fields);
|
||||
|
||||
|
|
|
@ -58,6 +58,14 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
public class IDVersionPostingsFormat extends PostingsFormat {
|
||||
|
||||
/** version must be >= this. */
|
||||
public static final long MIN_VERSION = 0;
|
||||
|
||||
// TODO: we could delta encode instead, and keep the last bit:
|
||||
|
||||
/** version must be <= this, because we encode with ZigZag. */
|
||||
public static final long MAX_VERSION = 0x3fffffffffffffffL;
|
||||
|
||||
private final int minTermsInBlock;
|
||||
private final int maxTermsInBlock;
|
||||
|
||||
|
@ -121,6 +129,9 @@ public class IDVersionPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
public static void longToBytes(long v, BytesRef bytes) {
|
||||
if (v > MAX_VERSION || v < MIN_VERSION) {
|
||||
throw new IllegalArgumentException("version must be >= MIN_VERSION=" + MIN_VERSION + " and <= MAX_VERSION=" + MAX_VERSION + " (got: " + v + ")");
|
||||
}
|
||||
bytes.offset = 0;
|
||||
bytes.length = 8;
|
||||
bytes.bytes[0] = (byte) (v >> 56);
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.index.DocsEnum;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
final class IDVersionPostingsReader extends PostingsReaderBase {
|
||||
|
@ -54,7 +55,11 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
|
|||
throws IOException {
|
||||
final IDVersionTermState termState = (IDVersionTermState) _termState;
|
||||
termState.docID = in.readVInt();
|
||||
termState.idVersion = in.readVLong();
|
||||
if (absolute) {
|
||||
termState.idVersion = in.readVLong();
|
||||
} else {
|
||||
termState.idVersion += BitUtil.zigZagDecode(in.readVLong());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||
|
@ -115,8 +116,11 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
lastVersion = IDVersionPostingsFormat.bytesToLong(payload);
|
||||
if (lastVersion < 0) {
|
||||
throw new IllegalArgumentException("version must be >= 0 (got: " + lastVersion + "; payload=" + payload + ")");
|
||||
if (lastVersion < IDVersionPostingsFormat.MIN_VERSION) {
|
||||
throw new IllegalArgumentException("version must be >= MIN_VERSION=" + IDVersionPostingsFormat.MIN_VERSION + " (got: " + lastVersion + "; payload=" + payload + ")");
|
||||
}
|
||||
if (lastVersion > IDVersionPostingsFormat.MAX_VERSION) {
|
||||
throw new IllegalArgumentException("version must be <= MAX_VERSION=" + IDVersionPostingsFormat.MAX_VERSION + " (got: " + lastVersion + "; payload=" + payload + ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,11 +148,19 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
state.idVersion = lastVersion;
|
||||
}
|
||||
|
||||
private long lastEncodedVersion;
|
||||
|
||||
@Override
|
||||
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
IDVersionTermState state = (IDVersionTermState) _state;
|
||||
out.writeVInt(state.docID);
|
||||
out.writeVLong(state.idVersion);
|
||||
if (absolute) {
|
||||
out.writeVLong(state.idVersion);
|
||||
} else {
|
||||
long delta = state.idVersion - lastEncodedVersion;
|
||||
out.writeVLong(BitUtil.zigZagEncode(delta));
|
||||
}
|
||||
lastEncodedVersion = state.idVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,11 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.codecs.idversion.StringAndPayloadField.SingleTokenWithPayloadTokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -155,7 +159,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
int upto;
|
||||
@Override
|
||||
public String next() {
|
||||
return Long.toString(random().nextLong() & 0x7ffffffffffffffL, radix);
|
||||
return Long.toString(random().nextLong() & 0x3ffffffffffffffL, radix);
|
||||
}
|
||||
};
|
||||
break;
|
||||
|
@ -170,7 +174,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
int upto;
|
||||
@Override
|
||||
public String next() {
|
||||
return Long.toString(random().nextLong() & 0x7ffffffffffffffL, radix);
|
||||
return Long.toString(random().nextLong() & 0x3ffffffffffffffL, radix);
|
||||
}
|
||||
};
|
||||
break;
|
||||
|
@ -225,7 +229,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
if (useMonotonicVersion) {
|
||||
version += TestUtil.nextInt(random(), 1, 10);
|
||||
} else {
|
||||
version = random().nextLong() & 0x7fffffffffffffffL;
|
||||
version = random().nextLong() & 0x3fffffffffffffffL;
|
||||
}
|
||||
idValues.put(idValue, version);
|
||||
if (VERBOSE) {
|
||||
|
@ -243,7 +247,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
if (useMonotonicVersion) {
|
||||
version += TestUtil.nextInt(random(), 1, 10);
|
||||
} else {
|
||||
version = random().nextLong() & 0x7fffffffffffffffL;
|
||||
version = random().nextLong() & 0x3fffffffffffffffL;
|
||||
}
|
||||
doc = new Document();
|
||||
doc.add(makeIDField(idValue, version));
|
||||
|
@ -362,8 +366,8 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
|
@ -445,7 +449,18 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
|
||||
public void testMissingPayload() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
|
||||
// MockAnalyzer minus maybePayload else it sometimes stuffs in an 8-byte payload!
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true, 100);
|
||||
tokenizer.setEnableChecks(true);
|
||||
MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
|
||||
return new TokenStreamComponents(tokenizer, filt);
|
||||
}
|
||||
};
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, a);
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
|
@ -567,6 +582,36 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testInvalidVersions() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
// -1
|
||||
doc.add(new StringAndPayloadField("id", "id", new BytesRef(new byte[] {(byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff})));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
// Long.MAX_VALUE:
|
||||
doc.add(new StringAndPayloadField("id", "id", new BytesRef(new byte[] {(byte)0x7f, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff})));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Simulates optimistic concurrency in a distributed indexing app and confirms the latest version always wins:
|
||||
public void testGlobalVersions() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -576,7 +621,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
|
||||
IDSource idsSource = getRandomIDs();
|
||||
int numIDs = atLeast(100);
|
||||
System.out.println("ids=" + numIDs);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: " + numIDs + " ids");
|
||||
}
|
||||
|
@ -649,7 +693,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
long newVersion;
|
||||
if (versionType == 0) {
|
||||
// Random:
|
||||
newVersion = random().nextLong() & 0x7fffffffffffffffL;
|
||||
newVersion = random().nextLong() & 0x3fffffffffffffffL;
|
||||
} else if (versionType == 1) {
|
||||
// Monotonic
|
||||
newVersion = nextVersion.getAndIncrement();
|
||||
|
|
Loading…
Reference in New Issue