LUCENE-4227: add DirectPostingsFormat, to hold all postings in simple uncompressed arrays

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1363803 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-07-20 14:33:43 +00:00
parent b339c63a9e
commit b85dc82b4f
17 changed files with 2240 additions and 14 deletions

View File

@ -629,3 +629,8 @@ you can now do this:
instance exposing the inverted index of the one document. From
Fields you can enumerate all fields, terms, positions, offsets.
* LUCENE-4227: If you were previously using Instantiated index, you
may want to use DirectPostingsFormat after upgrading: it stores all
postings in simple arrrays (byte[] for terms, int[] for docs, freqs,
positions, offsets). Note that this only covers postings, whereas
Instantiated covered all other parts of the index as well.

File diff suppressed because it is too large Load Diff

View File

@ -17,3 +17,4 @@ org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat

View File

@ -34,7 +34,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
* Test indexes ~82M docs with 26 terms each, so you get > Integer.MAX_VALUE terms/docs pairs
* @lucene.experimental
*/
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class Test2BPostings extends LuceneTestCase {
@Nightly

View File

@ -41,7 +41,7 @@ import java.util.Random;
//
// java -server -Xmx8g -d64 -cp .:lib/junit-4.10.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms
//
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class Test2BTerms extends LuceneTestCase {
private final static int TOKEN_LEN = 10;

View File

@ -976,7 +976,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
// Don't proceed if picked Codec is in the list of illegal ones.
final String format = _TestUtil.getPostingsFormat("f");
assumeFalse("Format: " + format + " does not support ReaderTermsIndexDivisor!",
(format.equals("SimpleText") || format.equals("Memory")));
(format.equals("SimpleText") || format.equals("Memory") || format.equals("Direct")));
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, conf);

View File

@ -132,8 +132,9 @@ public class TestLazyProxSkipping extends LuceneTestCase {
public void testLazySkipping() throws IOException {
final String fieldFormat = _TestUtil.getPostingsFormat(this.field);
assumeFalse("This test cannot run with Memory codec", fieldFormat.equals("Memory"));
assumeFalse("This test cannot run with SimpleText codec", fieldFormat.equals("SimpleText"));
assumeFalse("This test cannot run with Memory postings format", fieldFormat.equals("Memory"));
assumeFalse("This test cannot run with Direct postings format", fieldFormat.equals("Direct"));
assumeFalse("This test cannot run with SimpleText postings format", fieldFormat.equals("SimpleText"));
// test whether only the minimum amount of seeks()
// are performed

View File

@ -37,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util._TestUtil;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestLongPostings extends LuceneTestCase {
// Produces a realistic unicode random string that

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
// - mix in forceMerge, addIndexes
// - randomoly mix in non-congruent docs
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestNRTThreads extends ThreadedIndexingAndSearchingTestCase {
@Override

View File

@ -40,7 +40,7 @@ import org.apache.lucene.util._TestUtil;
* Test that norms info is preserved during index life - including
* separate norms, addDocument, addIndexes, forceMerge.
*/
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
@Slow
public class TestNorms extends LuceneTestCase {
final String byteTestField = "normsTestByte";

View File

@ -37,7 +37,7 @@ import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestTermsEnum extends LuceneTestCase {
public void test() throws Exception {

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.ThreadInterruptedException;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestNRTManager extends ThreadedIndexingAndSearchingTestCase {
private final ThreadLocal<Long> lastGens = new ThreadLocal<Long>();

View File

@ -29,7 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestSearchWithThreads extends LuceneTestCase {
int NUM_DOCS;
final int NUM_SEARCH_THREADS = 5;

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util._TestUtil;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
boolean warmCalled;

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util._TestUtil;
// - test pulling docs in 2nd round trip...
// - filter too
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestShardSearching extends ShardSearchingTestBase {
private static class PreviousSearchState {

View File

@ -67,7 +67,7 @@ import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.packed.PackedInts;
@SuppressCodecs({ "SimpleText", "Memory" })
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
@Slow
public class TestFSTs extends LuceneTestCase {

View File

@ -32,6 +32,7 @@ import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
@ -87,9 +88,11 @@ public class RandomCodec extends Lucene40Codec {
// block via CL:
int minItemsPerBlock = _TestUtil.nextInt(random, 2, 100);
int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random.nextInt(100);
int lowFreqCutoff = _TestUtil.nextInt(random, 2, 100);
add(avoidCodecs,
new Lucene40PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
new DirectPostingsFormat(minItemsPerBlock, lowFreqCutoff),
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
// add pulsing again with (usually) different parameters
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),