pull field spammers out of TestIndexWriter, stop the OOMs in testThreadInterruptDeadlock

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1624178 13f79535-47bb-0310-9956-ffa450edef68
2014-09-11 01:14:47 +00:00 · 2014-09-11 01:14:47 +00:00 · f1fb6ce6d3
parent 3b6c7f4172
commit f1fb6ce6d3
2 changed files with 164 additions and 129 deletions
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -272,37 +272,6 @@ public class TestIndexWriter extends LuceneTestCase {
      dir.close();
    }
    public void testManyFields() throws IOException {
      Directory dir = newDirectory();
      IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                   .setMaxBufferedDocs(10));
      for(int j=0;j<100;j++) {
        Document doc = new Document();
        doc.add(newField("a"+j, "aaa" + j, storedTextType));
        doc.add(newField("b"+j, "aaa" + j, storedTextType));
        doc.add(newField("c"+j, "aaa" + j, storedTextType));
        doc.add(newField("d"+j, "aaa", storedTextType));
        doc.add(newField("e"+j, "aaa", storedTextType));
        doc.add(newField("f"+j, "aaa", storedTextType));
        writer.addDocument(doc);
      }
      writer.close();
      IndexReader reader = DirectoryReader.open(dir);
      assertEquals(100, reader.maxDoc());
      assertEquals(100, reader.numDocs());
      for(int j=0;j<100;j++) {
        assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
        assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
        assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
        assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
        assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
        assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
      }
      reader.close();
      dir.close();
    }
    public void testSmallRAMBuffer() throws IOException {
      Directory dir = newDirectory();
      IndexWriter writer  = new IndexWriter(
@ -453,56 +422,6 @@ public class TestIndexWriter extends LuceneTestCase {
      dir.close();
    }
    public void testDiverseDocs() throws IOException {
      Directory dir = newDirectory();
      IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                   .setRAMBufferSizeMB(0.5));
      int n = atLeast(1);
      for(int i=0;i<n;i++) {
        // First, docs where every term is unique (heavy on
        // Posting instances)
        for(int j=0;j<100;j++) {
          Document doc = new Document();
          for(int k=0;k<100;k++) {
            doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
          }
          writer.addDocument(doc);
        }
        // Next, many single term docs where only one term
        // occurs (heavy on byte blocks)
        for(int j=0;j<100;j++) {
          Document doc = new Document();
          doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
          writer.addDocument(doc);
        }
        // Next, many single term docs where only one term
        // occurs but the terms are very long (heavy on
        // char[] arrays)
        for(int j=0;j<100;j++) {
          StringBuilder b = new StringBuilder();
          String x = Integer.toString(j) + ".";
          for(int k=0;k<1000;k++)
            b.append(x);
          String longTerm = b.toString();
          Document doc = new Document();
          doc.add(newField("field", longTerm, storedTextType));
          writer.addDocument(doc);
        }
      }
      writer.close();
      IndexReader reader = DirectoryReader.open(dir);
      IndexSearcher searcher = newSearcher(reader);
      int totalHits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1).totalHits;
      assertEquals(n*100, totalHits);
      reader.close();
      dir.close();
    }
    public void testEnablingNorms() throws IOException {
      Directory dir = newDirectory();
      IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
@ -1087,7 +1006,7 @@ public class TestIndexWriter extends LuceneTestCase {
            }
            IndexWriterConfig conf = newIndexWriterConfig(random,
                                                          new MockAnalyzer(random)).setMaxBufferedDocs(2);
-            conf.setInfoStream(log);
+            //conf.setInfoStream(log);
            w = new IndexWriter(dir, conf);
            Document doc = new Document();
@ -1103,7 +1022,7 @@ public class TestIndexWriter extends LuceneTestCase {
            doc.add(sortedDVField);
            doc.add(sortedSetDVField);
            for(int i=0;i<100;i++) {
-              log.println("\nTEST: i=" + i);
+              //log.println("\nTEST: i=" + i);
              idField.setStringValue(Integer.toString(i));
              binaryDVField.setBytesValue(new BytesRef(idField.stringValue()));
              numericDVField.setLongValue(i);
@ -2085,52 +2004,6 @@ public class TestIndexWriter extends LuceneTestCase {
    dir.close();
  }
  // LUCENE-4398
  public void testRotatingFieldNames() throws Exception {
    Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    iwc.setRAMBufferSizeMB(0.2);
    iwc.setMaxBufferedDocs(-1);
    IndexWriter w = new IndexWriter(dir, iwc);
    int upto = 0;
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setOmitNorms(true);
    int firstDocCount = -1;
    for(int iter=0;iter<10;iter++) {
      final int startFlushCount = w.getFlushCount();
      int docCount = 0;
      while(w.getFlushCount() == startFlushCount) {
        Document doc = new Document();
        for(int i=0;i<10;i++) {
          doc.add(new Field("field" + (upto++), "content", ft));
        }
        w.addDocument(doc);
        docCount++;
      }
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
      }
      if (iter == 0) {
        firstDocCount = docCount;
      }
      assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);
      if (upto > 5000) {
        // Start re-using field names after a while
        // ... important because otherwise we can OOME due
        // to too many FieldInfo instances.
        upto = 0;
      }
    }
    w.close();
    dir.close();
  }
  // LUCENE-4575
  public void testCommitWithUserDataOnly() throws Exception {
    Directory dir = newDirectory();
--- a/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java
@ -0,0 +1,162 @@
 package org.apache.lucene.index;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 /** Test that creates way, way, way too many fields */
 public class TestManyFields extends LuceneTestCase {
  private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);
  public void testManyFields() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                 .setMaxBufferedDocs(10));
    for(int j=0;j<100;j++) {
      Document doc = new Document();
      doc.add(newField("a"+j, "aaa" + j, storedTextType));
      doc.add(newField("b"+j, "aaa" + j, storedTextType));
      doc.add(newField("c"+j, "aaa" + j, storedTextType));
      doc.add(newField("d"+j, "aaa", storedTextType));
      doc.add(newField("e"+j, "aaa", storedTextType));
      doc.add(newField("f"+j, "aaa", storedTextType));
      writer.addDocument(doc);
    }
    writer.close();
    IndexReader reader = DirectoryReader.open(dir);
    assertEquals(100, reader.maxDoc());
    assertEquals(100, reader.numDocs());
    for(int j=0;j<100;j++) {
      assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
      assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
      assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
      assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
      assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
      assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
    }
    reader.close();
    dir.close();
  }
  public void testDiverseDocs() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                 .setRAMBufferSizeMB(0.5));
    int n = atLeast(1);
    for(int i=0;i<n;i++) {
      // First, docs where every term is unique (heavy on
      // Posting instances)
      for(int j=0;j<100;j++) {
        Document doc = new Document();
        for(int k=0;k<100;k++) {
          doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
        }
        writer.addDocument(doc);
      }
      // Next, many single term docs where only one term
      // occurs (heavy on byte blocks)
      for(int j=0;j<100;j++) {
        Document doc = new Document();
        doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
        writer.addDocument(doc);
      }
      // Next, many single term docs where only one term
      // occurs but the terms are very long (heavy on
      // char[] arrays)
      for(int j=0;j<100;j++) {
        StringBuilder b = new StringBuilder();
        String x = Integer.toString(j) + ".";
        for(int k=0;k<1000;k++)
          b.append(x);
        String longTerm = b.toString();
        Document doc = new Document();
        doc.add(newField("field", longTerm, storedTextType));
        writer.addDocument(doc);
      }
    }
    writer.close();
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(reader);
    int totalHits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1).totalHits;
    assertEquals(n*100, totalHits);
    reader.close();
    dir.close();
  }
  // LUCENE-4398
  public void testRotatingFieldNames() throws Exception {
    Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    iwc.setRAMBufferSizeMB(0.2);
    iwc.setMaxBufferedDocs(-1);
    IndexWriter w = new IndexWriter(dir, iwc);
    int upto = 0;
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setOmitNorms(true);
    int firstDocCount = -1;
    for(int iter=0;iter<10;iter++) {
      final int startFlushCount = w.getFlushCount();
      int docCount = 0;
      while(w.getFlushCount() == startFlushCount) {
        Document doc = new Document();
        for(int i=0;i<10;i++) {
          doc.add(new Field("field" + (upto++), "content", ft));
        }
        w.addDocument(doc);
        docCount++;
      }
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
      }
      if (iter == 0) {
        firstDocCount = docCount;
      }
      assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);
      if (upto > 5000) {
        // Start re-using field names after a while
        // ... important because otherwise we can OOME due
        // to too many FieldInfo instances.
        upto = 0;
      }
    }
    w.close();
    dir.close();
  }
 }