don't try to index megaterms

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1575538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-08 13:12:47 +00:00
parent cb291ae038
commit df17e3951d
17 changed files with 86 additions and 34 deletions

View File

@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchNoBits;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends LuceneTestCase {
public void testReuseDocsEnumDifferentReader() throws IOException {
Directory dir = newDirectory();
Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp));
int numdocs = atLeast(20);
createRandomIndex(numdocs, writer, random());
writer.commit();

View File

@ -139,8 +139,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
mp.setUseCompoundFile(false);
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
.setMergePolicy(mp);
conf.setCodec(Codec.forName("Lucene40"));
IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
*
@ -41,8 +42,11 @@ public class TestCustomNorms extends LuceneTestCase {
public void testFloatNorms() throws IOException {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
@ -63,9 +62,12 @@ public class TestDuelingCodecs extends LuceneTestCase {
long seed = random().nextLong();
// must use same seed because of random payloads, etc
Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
leftAnalyzer.setMaxTokenLength(maxTermLength);
MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
rightAnalyzer.setMaxTokenLength(maxTermLength);
// but these can be different
// TODO: this turns this into a really big test of Multi*, is that what we want?
IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);

View File

@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
Directory dir = newDirectory();
MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setFlushPolicy(flushPolicy);
analyzer).setFlushPolicy(flushPolicy);
final int numDWPT = 1 + atLeast(2);
DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
numDWPT);

View File

@ -54,7 +54,10 @@ public class TestForceMergeForever extends LuceneTestCase {
public void test() throws Exception {
final Directory d = newDirectory();
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
// Try to make an index that requires merging:
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));

View File

@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase {
System.out.println("TEST: iter=" + iter);
}
try {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
if (VERBOSE) {
// Do this ourselves instead of relying on LTC so

View File

@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
final int threadCount = TestUtil.nextInt(random(), 2, 6);
final AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
final LineFileDocs docs = new LineFileDocs(random());
final Thread[] threads = new Thread[threadCount];
final int iters = atLeast(100);

View File

@ -75,7 +75,10 @@ public class TestNorms extends LuceneTestCase {
// LUCENE-1260
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
config.setSimilarity(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Document doc = new Document();

View File

@ -44,7 +44,9 @@ public class TestTermsEnum extends LuceneTestCase {
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final Directory d = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), d);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
final int numDocs = atLeast(10);
for(int docCount=0;docCount<numDocs;docCount++) {
w.addDocument(docs.nextDoc());

View File

@ -24,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -36,13 +38,14 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
public class TestSameScoresWithThreads extends LuceneTestCase {
public void test() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
LineFileDocs docs = new LineFileDocs(random());
int charsToIndex = atLeast(100000);
int charsIndexed = 0;

View File

@ -38,14 +38,15 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
public class TestNRTCachingDirectory extends LuceneTestCase {
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random(), true);
final int numDocs = TestUtil.nextInt(random(), 100, 400);

View File

@ -292,7 +292,10 @@ public class TestFSTs extends LuceneTestCase {
final LineFileDocs docs = new LineFileDocs(random(), true);
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = TestUtil.getTempDir("fstlines");
final Directory dir = newFSDirectory(tempDir);
final IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -436,6 +436,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
for (int i = 0; i < numDocs; i++) {
Directory dir = newDirectory();
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
Document nextDoc = lineFileDocs.nextDoc();
Document doc = new Document();

View File

@ -17,19 +17,29 @@ package org.apache.lucene.queries;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -45,19 +55,13 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
public class CommonTermsQueryTest extends LuceneTestCase {
public void testBasics() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -186,7 +190,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
public void testMinShouldMatch() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -344,7 +350,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
@Test
public void testExtend() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -397,7 +405,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
createRandomIndex(atLeast(50), w, random().nextLong());
DirectoryReader reader = w.getReader();
AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);

View File

@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those

View File

@ -449,7 +449,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
myNodeID = nodeID;
dir = newFSDirectory(TestUtil.getTempDir("ShardSearchingTestBase"));
// TODO: set warmer
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
if (VERBOSE) {
iwc.setInfoStream(new PrintStreamInfoStream(System.out));