From f49e67456bf18cb9ace9aedf77cf4240058e6c5c Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 15 May 2014 22:53:30 +0000 Subject: [PATCH] LUCNE-5675: add failing test git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595068 13f79535-47bb-0310-9956-ffa450edef68 --- .../idversion/IDVersionPostingsFormat.java | 25 ++++++- .../idversion/IDVersionPostingsReader.java | 2 +- .../idversion/IDVersionPostingsWriter.java | 10 +-- .../idversion/IDVersionSegmentTermsEnum.java | 2 +- .../idversion/SingleDocsAndPositionsEnum.java | 4 +- .../TestIDVersionPostingsFormat.java | 71 +++++++++++++++++++ 6 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java index 9ffd35f19a8..def64c15dac 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java @@ -28,6 +28,7 @@ import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; /** A PostingsFormat for primary-key (ID) fields, that associates a @@ -41,7 +42,7 @@ import org.apache.lucene.util.IOUtils; * * @lucene.experimental */ -public abstract class IDVersionPostingsFormat extends PostingsFormat { +public class IDVersionPostingsFormat extends PostingsFormat { private final int minTermsInBlock; private final int maxTermsInBlock; @@ -93,4 +94,26 @@ public abstract class IDVersionPostingsFormat extends PostingsFormat { } } } + + public static long bytesToLong(BytesRef bytes) { + return ((bytes.bytes[bytes.offset]&0xFF) << 56) | + ((bytes.bytes[bytes.offset+1]&0xFF) << 48) | + ((bytes.bytes[bytes.offset+2]&0xFF) << 40) | + ((bytes.bytes[bytes.offset+3]&0xFF) << 32) | + ((bytes.bytes[bytes.offset+4]&0xFF) << 24) | + ((bytes.bytes[bytes.offset+5]&0xFF) << 16) | + ((bytes.bytes[bytes.offset+6]&0xFF) << 8) | + (bytes.bytes[bytes.offset+7]&0xFF); + } + + public static void longToBytes(long v, BytesRef bytes) { + bytes.bytes[0] = (byte) (v >> 56); + bytes.bytes[1] = (byte) (v >> 48); + bytes.bytes[2] = (byte) (v >> 40); + bytes.bytes[3] = (byte) (v >> 32); + bytes.bytes[4] = (byte) (v >> 24); + bytes.bytes[5] = (byte) (v >> 16); + bytes.bytes[6] = (byte) (v >> 8); + bytes.bytes[7] = (byte) v; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java index 26e300fed7b..8e028f9cd5f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java @@ -53,8 +53,8 @@ public final class IDVersionPostingsReader extends PostingsReaderBase { public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException { final IDVersionTermState termState = (IDVersionTermState) _termState; - termState.idVersion = Long.MAX_VALUE - longs[0]; termState.docID = in.readVInt(); + termState.idVersion = in.readVLong(); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java index e304dc982d7..e0190fa8cd0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java @@ -92,14 +92,14 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase { lastPosition = position; if (payload == null) { // nocommit need test - throw new IllegalArgumentException("missing payload"); + throw new IllegalArgumentException("token doens't have a payload"); } - if (payload.length == 0) { + if (payload.length != 8) { // nocommit need test - throw new IllegalArgumentException("payload.length == 0"); + throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")"); } - // nocommit decode payload to long here ... PayloadHelper!? or keep as byte[]? - lastVersion = 0; + + lastVersion = IDVersionPostingsFormat.bytesToLong(payload); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index 6320438a06d..8f9efb5babb 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -40,7 +40,7 @@ import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.Util; /** Iterates through terms in this field */ -final class IDVersionSegmentTermsEnum extends TermsEnum { +public final class IDVersionSegmentTermsEnum extends TermsEnum { final static Outputs> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs(); final static Pair NO_OUTPUT = fstOutputs.getNoOutput(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java index bb19e9443a6..d777f18ca9a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java @@ -27,7 +27,7 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum { private int singleDocID; private Bits liveDocs; private long version; - private final BytesRef payload = new BytesRef(); + private final BytesRef payload = new BytesRef(8); /** For reuse */ public void reset(int singleDocID, long version, Bits liveDocs) { @@ -79,7 +79,7 @@ class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum { public int nextPosition() { assert pos == -1; pos = 0; - // nocommit re-encode version back into payload here: + IDVersionPostingsFormat.longToBytes(version, payload); return pos; } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java new file mode 100644 index 00000000000..e888dedc822 --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java @@ -0,0 +1,71 @@ +package org.apache.lucene.codecs.idversion; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +/** + * Basic tests for IDVersionPostingsFormat + */ +public class TestIDVersionPostingsFormat extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(makeIDField("id0", 100)); + w.addDocument(doc); + IndexReader r = w.getReader(); + IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) MultiFields.getTerms(r, "id").iterator(null); + assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50)); + assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101)); + r.close(); + + w.close(); + dir.close(); + } + + // nocommit need testRandom + + private static Field makeIDField(String id, long version) { + Field field = newTextField("id", "", Field.Store.NO); + Token token = new Token(id, 0, id.length()); + BytesRef payload = new BytesRef(8); + IDVersionPostingsFormat.longToBytes(100, payload); + token.setPayload(payload); + field.setTokenStream(new CannedTokenStream(token)); + return field; + } +}