From 5b4e1aea06a87d9fc069501fa8b279c6b50bdecd Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 3 Jul 2012 00:43:06 +0000 Subject: [PATCH] LUCENE-3892: add PForPostingsFormat git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1356531 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/pfor/ForFactory.java | 2 +- .../lucene/codecs/pfor/ForPostingsFormat.java | 2 +- .../apache/lucene/codecs/pfor/ForUtil.java | 71 ++-- .../lucene/codecs/pfor/PForFactory.java | 114 +++++++ .../codecs/pfor/PForPostingsFormat.java | 117 +++++++ .../apache/lucene/codecs/pfor/PForUtil.java | 308 ++++++++++++++++++ ...essImpl.java => PackedIntsDecompress.java} | 2 +- .../lucene/codecs/pfor/gendecompress.py | 7 +- .../org.apache.lucene.codecs.PostingsFormat | 1 + .../{TestForUtil.java => TestPForUtil.java} | 19 +- .../index/TestBackwardsCompatibility.java | 2 +- .../lucene/index/TestPostingsOffsets.java | 2 +- .../analysis/BaseTokenStreamTestCase.java | 1 + .../mockrandom/MockRandomPostingsFormat.java | 1 + 14 files changed, 594 insertions(+), 55 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java rename lucene/core/src/java/org/apache/lucene/codecs/pfor/{ForDecompressImpl.java => PackedIntsDecompress.java} (99%) rename lucene/core/src/test/org/apache/lucene/codecs/pfor/{TestForUtil.java => TestPForUtil.java} (92%) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java index 96396d80c2f..11eb65009bb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForFactory.java @@ -36,7 +36,7 @@ import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; * Things really make sense are: flushBlock() and readBlock() */ -public class ForFactory extends IntStreamFactory { +public final class ForFactory extends IntStreamFactory { private final int blockSize; public ForFactory() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java index 365ca9f7923..fdd40b4d7cb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForPostingsFormat.java @@ -42,7 +42,7 @@ import org.apache.lucene.codecs.sep.SepPostingsWriter; * to a PostingsWriter/ReaderBase, and get customized * format plugged. */ -public class ForPostingsFormat extends PostingsFormat { +public final class ForPostingsFormat extends PostingsFormat { private final int blockSize; private final int minBlockSize; private final int maxBlockSize; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java index d916f272737..f21ee986666 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/ForUtil.java @@ -21,16 +21,15 @@ import java.nio.ByteBuffer; import java.util.Arrays; // Encode all values in normal area, based on the bit size for max value -public final class ForUtil { +public class ForUtil { public static final int HEADER_INT_SIZE=1; - private static final int[] MASK = { 0x00000000, + protected static final int[] MASK = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; - private static final int[] PER_EXCEPTION_SIZE = {1,2,4}; public static int compress(final int[] data, int size, IntBuffer intBuffer) { int numBits=getNumBits(data,size); @@ -51,40 +50,40 @@ public final class ForUtil { int numInts = (header & MASK[8]) + 1; int numBits = ((header >> 8) & MASK[5]) + 1; - // TODO: ForDecompressImpl is hardewired to size==128 only + // TODO: PackedIntsDecompress is hardewired to size==128 only switch(numBits) { - case 1: ForDecompressImpl.decode1(intBuffer, data); break; - case 2: ForDecompressImpl.decode2(intBuffer, data); break; - case 3: ForDecompressImpl.decode3(intBuffer, data); break; - case 4: ForDecompressImpl.decode4(intBuffer, data); break; - case 5: ForDecompressImpl.decode5(intBuffer, data); break; - case 6: ForDecompressImpl.decode6(intBuffer, data); break; - case 7: ForDecompressImpl.decode7(intBuffer, data); break; - case 8: ForDecompressImpl.decode8(intBuffer, data); break; - case 9: ForDecompressImpl.decode9(intBuffer, data); break; - case 10: ForDecompressImpl.decode10(intBuffer, data); break; - case 11: ForDecompressImpl.decode11(intBuffer, data); break; - case 12: ForDecompressImpl.decode12(intBuffer, data); break; - case 13: ForDecompressImpl.decode13(intBuffer, data); break; - case 14: ForDecompressImpl.decode14(intBuffer, data); break; - case 15: ForDecompressImpl.decode15(intBuffer, data); break; - case 16: ForDecompressImpl.decode16(intBuffer, data); break; - case 17: ForDecompressImpl.decode17(intBuffer, data); break; - case 18: ForDecompressImpl.decode18(intBuffer, data); break; - case 19: ForDecompressImpl.decode19(intBuffer, data); break; - case 20: ForDecompressImpl.decode20(intBuffer, data); break; - case 21: ForDecompressImpl.decode21(intBuffer, data); break; - case 22: ForDecompressImpl.decode22(intBuffer, data); break; - case 23: ForDecompressImpl.decode23(intBuffer, data); break; - case 24: ForDecompressImpl.decode24(intBuffer, data); break; - case 25: ForDecompressImpl.decode25(intBuffer, data); break; - case 26: ForDecompressImpl.decode26(intBuffer, data); break; - case 27: ForDecompressImpl.decode27(intBuffer, data); break; - case 28: ForDecompressImpl.decode28(intBuffer, data); break; - case 29: ForDecompressImpl.decode29(intBuffer, data); break; - case 30: ForDecompressImpl.decode30(intBuffer, data); break; - case 31: ForDecompressImpl.decode31(intBuffer, data); break; - case 32: ForDecompressImpl.decode32(intBuffer, data); break; + case 1: PackedIntsDecompress.decode1(intBuffer, data); break; + case 2: PackedIntsDecompress.decode2(intBuffer, data); break; + case 3: PackedIntsDecompress.decode3(intBuffer, data); break; + case 4: PackedIntsDecompress.decode4(intBuffer, data); break; + case 5: PackedIntsDecompress.decode5(intBuffer, data); break; + case 6: PackedIntsDecompress.decode6(intBuffer, data); break; + case 7: PackedIntsDecompress.decode7(intBuffer, data); break; + case 8: PackedIntsDecompress.decode8(intBuffer, data); break; + case 9: PackedIntsDecompress.decode9(intBuffer, data); break; + case 10: PackedIntsDecompress.decode10(intBuffer, data); break; + case 11: PackedIntsDecompress.decode11(intBuffer, data); break; + case 12: PackedIntsDecompress.decode12(intBuffer, data); break; + case 13: PackedIntsDecompress.decode13(intBuffer, data); break; + case 14: PackedIntsDecompress.decode14(intBuffer, data); break; + case 15: PackedIntsDecompress.decode15(intBuffer, data); break; + case 16: PackedIntsDecompress.decode16(intBuffer, data); break; + case 17: PackedIntsDecompress.decode17(intBuffer, data); break; + case 18: PackedIntsDecompress.decode18(intBuffer, data); break; + case 19: PackedIntsDecompress.decode19(intBuffer, data); break; + case 20: PackedIntsDecompress.decode20(intBuffer, data); break; + case 21: PackedIntsDecompress.decode21(intBuffer, data); break; + case 22: PackedIntsDecompress.decode22(intBuffer, data); break; + case 23: PackedIntsDecompress.decode23(intBuffer, data); break; + case 24: PackedIntsDecompress.decode24(intBuffer, data); break; + case 25: PackedIntsDecompress.decode25(intBuffer, data); break; + case 26: PackedIntsDecompress.decode26(intBuffer, data); break; + case 27: PackedIntsDecompress.decode27(intBuffer, data); break; + case 28: PackedIntsDecompress.decode28(intBuffer, data); break; + case 29: PackedIntsDecompress.decode29(intBuffer, data); break; + case 30: PackedIntsDecompress.decode30(intBuffer, data); break; + case 31: PackedIntsDecompress.decode31(intBuffer, data); break; + case 32: PackedIntsDecompress.decode32(intBuffer, data); break; default: throw new IllegalStateException("Unknown numFrameBits " + numBits); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java new file mode 100644 index 00000000000..b6c990662ce --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForFactory.java @@ -0,0 +1,114 @@ +package org.apache.lucene.codecs.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; + +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.codecs.sep.IntStreamFactory; +import org.apache.lucene.codecs.sep.IntIndexInput; +import org.apache.lucene.codecs.sep.IntIndexOutput; +import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; + +/** + * Stuff to pass to PostingsReader/WriterBase. + * Things really make sense are: flushBlock() and readBlock() + */ + +public final class PForFactory extends IntStreamFactory { + private final int blockSize; + + public PForFactory() { + this.blockSize=PForPostingsFormat.DEFAULT_BLOCK_SIZE; + } + + @Override + public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException { + IndexOutput out = dir.createOutput(fileName, context); + boolean success = false; + try { + FixedIntBlockIndexOutput ret = new PForIndexOutput(out, blockSize); + success = true; + return ret; + } finally { + if (!success) { + // TODO: why handle exception like this? + // and why not use similar codes for read part? + IOUtils.closeWhileHandlingException(out); + } + } + } + @Override + public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { + FixedIntBlockIndexInput ret = new PForIndexInput(dir.openInput(fileName, context)); + return ret; + } + + // wrap input and output with buffer support + private class PForIndexInput extends FixedIntBlockIndexInput { + PForIndexInput(final IndexInput in) throws IOException { + super(in); + } + class PForBlockReader implements FixedIntBlockIndexInput.BlockReader { + byte[] encoded; + int[] buffer; + IndexInput in; + IntBuffer encodedBuffer; + PForBlockReader(final IndexInput in, final int[] buffer) { + this.encoded = new byte[blockSize*8+4]; + this.in=in; + this.buffer=buffer; + this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); + } + public void seek(long pos) {} + // TODO: implement public void skipBlock() {} ? + public void readBlock() throws IOException { + final int numBytes = in.readInt(); + assert numBytes <= blockSize*8+4; + in.readBytes(encoded,0,numBytes); + PForUtil.decompress(encodedBuffer,buffer); + } + } + @Override + protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException { + return new PForBlockReader(in,buffer); + } + } + + private class PForIndexOutput extends FixedIntBlockIndexOutput { + private byte[] encoded; + private IntBuffer encodedBuffer; + PForIndexOutput(IndexOutput out, int blockSize) throws IOException { + super(out,blockSize); + this.encoded = new byte[blockSize*8+4]; + this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer(); + } + @Override + protected void flushBlock() throws IOException { + final int numBytes = PForUtil.compress(buffer,buffer.length,encodedBuffer); + out.writeInt(numBytes); + out.writeBytes(encoded, numBytes); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java new file mode 100644 index 00000000000..28444535053 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForPostingsFormat.java @@ -0,0 +1,117 @@ +package org.apache.lucene.codecs.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Set; +import java.io.IOException; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.BlockTreeTermsWriter; +import org.apache.lucene.codecs.BlockTreeTermsReader; +import org.apache.lucene.codecs.TermsIndexReaderBase; +import org.apache.lucene.codecs.TermsIndexWriterBase; +import org.apache.lucene.codecs.FixedGapTermsIndexReader; +import org.apache.lucene.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.sep.SepPostingsReader; +import org.apache.lucene.codecs.sep.SepPostingsWriter; +/** + * This class actually only pass the PForFactory + * to a PostingsWriter/ReaderBase, and get customized + * format plugged. + */ +public final class PForPostingsFormat extends PostingsFormat { + private final int blockSize; + private final int minBlockSize; + private final int maxBlockSize; + protected final static int DEFAULT_BLOCK_SIZE = 128; + protected final static int DEFAULT_TERM_CACHED_SIZE = 1024; + + public PForPostingsFormat() { + super("PFor"); + this.blockSize = DEFAULT_BLOCK_SIZE; + this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE; + this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE; + } + public PForPostingsFormat(int minBlockSize, int maxBlockSize) { + super("PFor"); + this.blockSize = DEFAULT_BLOCK_SIZE; + this.minBlockSize = minBlockSize; + assert minBlockSize > 1; + this.maxBlockSize = maxBlockSize; + assert minBlockSize <= maxBlockSize; + } + + @Override + public String toString() { + return getName() + "(blocksize=" + blockSize + ")"; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + // TODO: implement a new PostingsWriterBase to improve skip-settings + PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new PForFactory()); + boolean success = false; + try { + FieldsConsumer ret = new BlockTreeTermsWriter(state, + postingsWriter, + minBlockSize, + maxBlockSize); + success = true; + return ret; + } finally { + if (!success) { + postingsWriter.close(); + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postingsReader = new SepPostingsReader(state.dir, + state.fieldInfos, + state.segmentInfo, + state.context, + new PForFactory(), + state.segmentSuffix); + + boolean success = false; + try { + FieldsProducer ret = new BlockTreeTermsReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.context, + state.segmentSuffix, + state.termsIndexDivisor); + success = true; + return ret; + } finally { + if (!success) { + postingsReader.close(); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java new file mode 100644 index 00000000000..364187bdd5d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/pfor/PForUtil.java @@ -0,0 +1,308 @@ +package org.apache.lucene.codecs.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.IntBuffer; +import java.nio.ByteBuffer; +import java.util.Arrays; + +// Encode all small values and exception pointers in normal area, +// Encode large values in exception area. +// Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes +public final class PForUtil extends ForUtil { + protected static final int[] PER_EXCEPTION_SIZE = {1,2,4}; + + public static int compress(final int[] data, int size, IntBuffer intBuffer) { + int numBits=getNumBits(data,size); + + int[] excValues = new int[size]; + int excNum = 0, excLastPos = -1, excFirstPos = -1; + int excLastNonForcePos = -1; + int excNumBase = 0; // num of exception until the last non-force exception + int excBytes = 1; // bytes per exception + int excByteOffset = 0; // bytes of preceeding codes like header and normal area + long maxChain = (1<<8) - 2; // header bits limits this to 254 + boolean conValue, conForce, conEnd; + int i=0; + + // estimate exceptions + for (i=0; i= maxChain + excLastPos); // force exception + if (conValue || conForce) { + excValues[excNum++] = data[i]; + if (excLastPos == -1) { + maxChain = 1L<= maxChain + excLastPos); // force exception + conEnd = (excNum == excNumBase); // following forced ignored + if ((!conValue && !conForce) || conEnd) { + encodeNormalValue(intBuffer,i,data[i], numBits); + } else { + if (excLastPos >= 0) { + encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits); + } + excNum++; + excLastPos = i; + } + } + if (excLastPos >= 0) { + encodeNormalValue(intBuffer, excLastPos, (i-excLastPos-1)&MASK[numBits], numBits); // mask out suppressed force exception + } + } + + // encode exception area + i=0; + for (; i> 8) & MASK[8]) + 1; + int excFirstPos = ((header >> 16) & MASK[8]) - 1; + int excBytes = PER_EXCEPTION_SIZE[(header >> 29) & MASK[2]]; + int numBits = ((header >> 24) & MASK[5]) + 1; + + // TODO: PackedIntsDecompress is hardewired to size==128 only + switch(numBits) { + case 1: PackedIntsDecompress.decode1(intBuffer, data); break; + case 2: PackedIntsDecompress.decode2(intBuffer, data); break; + case 3: PackedIntsDecompress.decode3(intBuffer, data); break; + case 4: PackedIntsDecompress.decode4(intBuffer, data); break; + case 5: PackedIntsDecompress.decode5(intBuffer, data); break; + case 6: PackedIntsDecompress.decode6(intBuffer, data); break; + case 7: PackedIntsDecompress.decode7(intBuffer, data); break; + case 8: PackedIntsDecompress.decode8(intBuffer, data); break; + case 9: PackedIntsDecompress.decode9(intBuffer, data); break; + case 10: PackedIntsDecompress.decode10(intBuffer, data); break; + case 11: PackedIntsDecompress.decode11(intBuffer, data); break; + case 12: PackedIntsDecompress.decode12(intBuffer, data); break; + case 13: PackedIntsDecompress.decode13(intBuffer, data); break; + case 14: PackedIntsDecompress.decode14(intBuffer, data); break; + case 15: PackedIntsDecompress.decode15(intBuffer, data); break; + case 16: PackedIntsDecompress.decode16(intBuffer, data); break; + case 17: PackedIntsDecompress.decode17(intBuffer, data); break; + case 18: PackedIntsDecompress.decode18(intBuffer, data); break; + case 19: PackedIntsDecompress.decode19(intBuffer, data); break; + case 20: PackedIntsDecompress.decode20(intBuffer, data); break; + case 21: PackedIntsDecompress.decode21(intBuffer, data); break; + case 22: PackedIntsDecompress.decode22(intBuffer, data); break; + case 23: PackedIntsDecompress.decode23(intBuffer, data); break; + case 24: PackedIntsDecompress.decode24(intBuffer, data); break; + case 25: PackedIntsDecompress.decode25(intBuffer, data); break; + case 26: PackedIntsDecompress.decode26(intBuffer, data); break; + case 27: PackedIntsDecompress.decode27(intBuffer, data); break; + case 28: PackedIntsDecompress.decode28(intBuffer, data); break; + case 29: PackedIntsDecompress.decode29(intBuffer, data); break; + case 30: PackedIntsDecompress.decode30(intBuffer, data); break; + case 31: PackedIntsDecompress.decode31(intBuffer, data); break; + case 32: PackedIntsDecompress.decode32(intBuffer, data); break; + default: + throw new IllegalStateException("Unknown numFrameBits " + numBits); + } + patchException(intBuffer,data,excNum,excFirstPos,excBytes); + return numInts; + } + + static void encodeHeader(IntBuffer intBuffer, int numInts, int numBits, int excNum, int excFirstPos, int excBytes) { + int header = getHeader(numInts,numBits,excNum,excFirstPos,excBytes); + intBuffer.put(0, header); + } + + static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) { + if (num == 0) + return; + if (perbytes == 1) { + int curBytePos = byteOffset; + for (int i=0; i>> 8) & MASK[8]); + curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]); + curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]); + } + if (i>> j) & MASK[8]); + } + } + } else if (excBytes == 2) { + for (i=0; i+1>> 16) & MASK[16]); + } + if (i pos; + return nextPos; + } + + // TODO: shall we use 32 NumBits directly if it exceeds 28 bits? + static int getNumBits(final int[] data, int size) { + int optBits=1; + int optSize=estimateCompressedSize(data,size,1); + for (int i=2; i<=32; ++i) { + int curSize=estimateCompressedSize(data,size,i); + if (curSize sz*8+4) { println("Excceed? "+ensz+">"+(sz*8+4)); ensz=sz*8+4; } resBuffer.rewind(); - ForUtil.decompress(resBuffer,copy); + PForUtil.decompress(resBuffer,copy); -// println(getHex(data,sz)+"\n"); -// println(getHex(res,ensz)+"\n"); -// println(getHex(copy,sz)+"\n"); + //println(getHex(data,sz)+"\n"); + //println(getHex(res,ensz)+"\n"); + //println(getHex(copy,sz)+"\n"); assert cmp(data,sz,copy,sz)==true; } @@ -99,15 +99,14 @@ public class TestForUtil extends LuceneTestCase { for (i=0; i sz*8+4) { println("Excceed? "+ensz+">"+(sz*8+4)); ensz=sz*8+4; } int[] copy = new int[sz]; - - ForUtil.decompress(resBuffer,copy); + PForUtil.decompress(resBuffer,copy); // println(getHex(data,sz)+"\n"); // println(getHex(res,ensz)+"\n"); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index da989889af3..a5c890fca2f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -76,7 +76,7 @@ import org.junit.BeforeClass; // we won't even be running the actual code, only the impostor // @SuppressCodecs("Lucene4x") // Sep codec cannot yet handle the offsets in our 4.x index! -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For"}) +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For","PFor"}) public class TestBackwardsCompatibility extends LuceneTestCase { // Uncomment these cases & run them on an older Lucene diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java index 5bdfcb62355..f82173f2e67 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java @@ -49,7 +49,7 @@ import org.apache.lucene.util._TestUtil; // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs. // not all codecs store prx separate... // TODO: fix sep codec to index offsets so we can greatly reduce this list! -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For"}) +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom","For","PFor"}) public class TestPostingsOffsets extends LuceneTestCase { IndexWriterConfig iwc; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index 1ec10c02d47..ec4a9ec36d4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -487,6 +487,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { add("MockSep"); add("MockRandom"); add("For"); + add("PFor"); }}; private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index b0d9553528b..19ef672be0b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -95,6 +95,7 @@ public class MockRandomPostingsFormat extends PostingsFormat { delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize)); // TODO: others delegates.add(new ForFactory()); + delegates.add(new PForFactory()); } private static String getExtension(String fileName) {