LUCENE-6486: make payloads optional in DocumentDictionary

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680641 13f79535-47bb-0310-9956-ffa450edef68
2015-05-20 17:29:26 +00:00 · 2015-05-20 17:29:26 +00:00 · 3e646430e3
parent 1a93333d50
commit 3e646430e3
5 changed files with 171 additions and 124 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -178,6 +178,10 @@ Bug Fixes
 * LUCENE-6483: Ensure core closed listeners are called on the same cache key as
  the reader which has been used to register the listener. (Adrien Grand)

+* LUCENE-6486 DocumentDictionary iterator no longer skips
+  documents with no payloads and now returns an empty BytesRef instead
+  (Marius Grama via Michael McCandless)
+
 API Changes

 * LUCENE-6377: SearcherFactory#newSearcher now accepts the previous reader
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
@ -18,6 +18,7 @@ package org.apache.lucene.search.suggest;
 */

 import java.io.IOException;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;

@ -41,18 +42,15 @@ import org.apache.lucene.util.BytesRef;
 * <b>NOTE:</b> 
 *  <ul>
 *    <li>
- *      The term and (optionally) payload fields have to be
- *      stored
+ *      The term field has to be stored; if it is missing, the document is skipped.
+ *    </li>
+ *    <li>
+ *      The payload and contexts field are optional and are not required to be stored.
 *    </li>
 *    <li>
 *      The weight field can be stored or can be a {@link NumericDocValues}.
 *      If the weight field is not defined, the value of the weight is <code>0</code>
 *    </li>
- *    <li>
- *      if any of the term or (optionally) payload fields supplied
- *      do not have a value for a document, then the document is 
- *      skipped by the dictionary
- *    </li>
 *  </ul>
 */
 public class DocumentDictionary implements Dictionary {
@ -90,7 +88,7 @@ public class DocumentDictionary implements Dictionary {
   * Creates a new dictionary with the contents of the fields named <code>field</code>
   * for the terms, <code>weightField</code> for the weights that will be used for the 
   * the corresponding terms, <code>payloadField</code> for the corresponding payloads
-   * for the entry and <code>contextsFeild</code> for associated contexts.
+   * for the entry and <code>contextsField</code> for associated contexts.
   */
  public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) {
    this.reader = reader;
@ -167,25 +165,26 @@ public class DocumentDictionary implements Dictionary {

        StoredDocument doc = reader.document(currentDocId, relevantFields);

-        Set<BytesRef> tempContexts = new HashSet<>();
-
-        BytesRef tempPayload;
+        BytesRef tempPayload = null;
        if (hasPayloads) {
          StorableField payload = doc.getField(payloadField);
-          if (payload == null) {
-            continue;
-          } else if (payload.binaryValue() != null) {
-            tempPayload =  payload.binaryValue();
-          } else if (payload.stringValue() != null) {
-            tempPayload = new BytesRef(payload.stringValue());
-          } else {
-            continue;
+          if (payload != null) {
+            if (payload.binaryValue() != null) {
+              tempPayload =  payload.binaryValue();
+            } else if (payload.stringValue() != null) {
+              tempPayload = new BytesRef(payload.stringValue());
+            }
+          }
+          // in case that the iterator has payloads configured, use empty values
+          // instead of null for payload
+          if (tempPayload == null) {
+            tempPayload = new BytesRef();
          }
-        } else {
-          tempPayload = null;
        }

+        Set<BytesRef> tempContexts;
        if (hasContexts) {
+          tempContexts = new HashSet<>();
          final StorableField[] contextFields = doc.getFields(contextsField);
          for (StorableField contextField : contextFields) {
            if (contextField.binaryValue() != null) {
@ -196,6 +195,8 @@ public class DocumentDictionary implements Dictionary {
              continue;
            }
          }
+        } else {
+          tempContexts = Collections.emptySet();
        }

        currentDocFields = doc.getFields(field);
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
@ -31,7 +31,8 @@ import org.apache.lucene.queries.function.ValueSource;

 /**
 * <p>
- * Dictionary with terms and optionally payload information 
+ * Dictionary with terms and optionally payload and
+ * optionally contexts information
 * taken from stored fields in a Lucene index. Similar to 
 * {@link DocumentDictionary}, except it obtains the weight
 * of the terms in a document based on a {@link ValueSource}.
@ -39,13 +40,10 @@ import org.apache.lucene.queries.function.ValueSource;
 * <b>NOTE:</b> 
 *  <ul>
 *    <li>
- *      The term and (optionally) payload fields have to be
- *      stored
+ *      The term field has to be stored; if it is missing, the document is skipped.
 *    </li>
 *    <li>
- *      if the term or (optionally) payload fields supplied
- *      do not have a value for a document, then the document is 
- *      rejected by the dictionary
+ *      The payload and contexts field are optional and are not required to be stored.
 *    </li>
 *  </ul>
 *  <p>
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
@ -55,63 +55,6 @@ public class DocumentDictionaryTest extends LuceneTestCase {
  static final String PAYLOAD_FIELD_NAME = "p1";
  static final String CONTEXT_FIELD_NAME = "c1";
  
-  /** Returns Pair(list of invalid document terms, Map of document term -&gt; document) */
-  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload, boolean requiresContexts) {
-    Map<String, Document> docs = new HashMap<>();
-    List<String> invalidDocTerms = new ArrayList<>();
-    for(int i = 0; i < ndocs ; i++) {
-      Document doc = new Document();
-      boolean invalidDoc = false;
-      Field field = null;
-      // usually have valid term field in document
-      if (usually()) {
-        field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
-        doc.add(field);
-      } else {
-        invalidDoc = true;
-      }
-      
-      // even if payload is not required usually have it
-      if (requiresPayload || usually()) {
-        // usually have valid payload field in document
-        if (usually()) {
-          Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
-          doc.add(payload);
-        } else if (requiresPayload) {
-          invalidDoc = true;
-        }
-      }
-      
-      if (requiresContexts || usually()) {
-        if (usually()) {
-          for (int j = 0; j < atLeast(2); j++) {
-            doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j)));
-          }
-        }
-        // we should allow entries without context
-      }
-      
-      // usually have valid weight field in document
-      if (usually()) {
-        Field weight = (rarely()) ? 
-            new StoredField(WEIGHT_FIELD_NAME, 100d + i) : 
-            new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
-        doc.add(weight);
-      }
-      
-      String term = null;
-      if (invalidDoc) {
-        term = (field!=null) ? field.stringValue() : "invalid_" + i;
-        invalidDocTerms.add(term);
-      } else {
-        term = field.stringValue();
-      }
-      
-      docs.put(term, doc);
-    }
-    return new SimpleEntry<>(invalidDocTerms, docs);
-  }
-  
  @Test
  public void testEmptyReader() throws IOException {
    Directory dir = newDirectory();
@ -140,7 +83,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    for(Document doc: docs.values()) {
@ -157,7 +100,9 @@ public class DocumentDictionaryTest extends LuceneTestCase {
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    
    for (String invalidTerm : invalidDocTerms) {
@ -167,6 +112,41 @@ public class DocumentDictionaryTest extends LuceneTestCase {
    
    IOUtils.close(ir, analyzer, dir);
  }
+
+  @Test
+  public void testWithOptionalPayload() throws IOException {
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+
+    // Create a document that is missing the payload field
+    Document doc = new Document();
+    Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
+    doc.add(field);
+    // do not store the payload or the contexts
+    Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
+    doc.add(weight);
+    writer.addDocument(doc);
+    writer.commit();
+    writer.close();
+    IndexReader ir = DirectoryReader.open(dir);
+
+    // Even though the payload field is missing, the dictionary iterator should not skip the document
+    // because the payload field is optional.
+    Dictionary dictionaryOptionalPayload =
+        new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
+    InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
+    BytesRef f = inputIterator.next();
+    assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
+    Field weightField = doc.getField(WEIGHT_FIELD_NAME);
+    assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
+    Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+    assertNull(payloadField);
+    assertTrue(inputIterator.payload().length == 0);
+    IOUtils.close(ir, analyzer, dir);
+  }
 
  @Test
  public void testWithoutPayload() throws IOException {
@ -175,7 +155,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    for(Document doc: docs.values()) {
@ -192,7 +172,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
    }
    
    for (String invalidTerm : invalidDocTerms) {
@ -211,7 +191,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, true);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    for(Document doc: docs.values()) {
@ -228,7 +208,9 @@ public class DocumentDictionaryTest extends LuceneTestCase {
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
      Set<BytesRef> oriCtxs = new HashSet<>();
      Set<BytesRef> contextSet = inputIterator.contexts();
      for (StorableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) {
@ -252,7 +234,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    Random rand = random();
@ -291,7 +273,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
    }
    
    for (String invalidTerm : invalidDocTerms) {
@ -324,13 +306,65 @@ public class DocumentDictionaryTest extends LuceneTestCase {
      assertTrue(f.equals(nextSuggestion.term));
      long weight = nextSuggestion.weight;
      assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
-      assertTrue(inputIterator.payload().equals(nextSuggestion.payload));
+      assertEquals(inputIterator.payload(), nextSuggestion.payload);
      assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
    }
    assertFalse(suggestionsIter.hasNext());
    IOUtils.close(ir, analyzer, dir);
  }

+  /** Returns Pair(list of invalid document terms, Map of document term -&gt; document) */
+  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresContexts) {
+    Map<String, Document> docs = new HashMap<>();
+    List<String> invalidDocTerms = new ArrayList<>();
+    for(int i = 0; i < ndocs ; i++) {
+      Document doc = new Document();
+      boolean invalidDoc = false;
+      Field field = null;
+      // usually have valid term field in document
+      if (usually()) {
+        field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
+        doc.add(field);
+      } else {
+        invalidDoc = true;
+      }
+
+      // even if payload is not required usually have it
+      if (usually()) {
+        Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
+        doc.add(payload);
+      }
+
+      if (requiresContexts || usually()) {
+        if (usually()) {
+          for (int j = 0; j < atLeast(2); j++) {
+            doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j)));
+          }
+        }
+        // we should allow entries without context
+      }
+
+      // usually have valid weight field in document
+      if (usually()) {
+        Field weight = (rarely()) ?
+                new StoredField(WEIGHT_FIELD_NAME, 100d + i) :
+                new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
+        doc.add(weight);
+      }
+
+      String term = null;
+      if (invalidDoc) {
+        term = (field!=null) ? field.stringValue() : "invalid_" + i;
+        invalidDocTerms.add(term);
+      } else {
+        term = field.stringValue();
+      }
+
+      docs.put(term, doc);
+    }
+    return new SimpleEntry<>(invalidDocTerms, docs);
+  }
+
  private List<Suggestion> indexMultiValuedDocuments(int numDocs, RandomIndexWriter writer) throws IOException {
    List<Suggestion> suggestionList = new ArrayList<>(numDocs);

--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
@ -57,31 +57,6 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
  static final String WEIGHT_FIELD_NAME_3 = "w3";
  static final String PAYLOAD_FIELD_NAME = "p1";
  static final String CONTEXTS_FIELD_NAME = "c1";
-
-  private Map<String, Document> generateIndexDocuments(int ndocs) {
-    Map<String, Document> docs = new HashMap<>();
-    for(int i = 0; i < ndocs ; i++) {
-      Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
-      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
-      Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
-      Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
-      Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
-      Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
-      Document doc = new Document();
-      doc.add(field);
-      doc.add(payload);
-      doc.add(weight1);
-      doc.add(weight2);
-      doc.add(weight3);
-      doc.add(contexts);
-      for(int j = 1; j < atLeast(3); j++) {
-        contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
-        doc.add(contexts);
-      }
-      docs.put(field.stringValue(), doc);
-    }
-    return docs;
-  }
  
  @Test
  public void testEmptyReader() throws IOException {
@ -130,7 +105,9 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
@ -162,7 +139,9 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
      Set<BytesRef> originalCtxs = new HashSet<>();
      for (Field ctxf: doc.getFields(CONTEXTS_FIELD_NAME)) {
        originalCtxs.add(ctxf.binaryValue());
@ -199,7 +178,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
@ -252,7 +231,9 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
      long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      assertEquals(inputIterator.weight(), w2+w1);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
@ -280,10 +261,39 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
      Document doc = docs.remove(f.utf8ToString());
      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
      assertEquals(inputIterator.weight(), 10);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
  }
-  
+
+  private Map<String, Document> generateIndexDocuments(int ndocs) {
+    Map<String, Document> docs = new HashMap<>();
+    for(int i = 0; i < ndocs ; i++) {
+      Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
+      Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
+      Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
+      Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
+      Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
+      Document doc = new Document();
+      doc.add(field);
+      // even if payload is not required usually have it
+      if (usually()) {
+        Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
+        doc.add(payload);
+      }
+      doc.add(weight1);
+      doc.add(weight2);
+      doc.add(weight3);
+      doc.add(contexts);
+      for(int j = 1; j < atLeast(3); j++) {
+        contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
+        doc.add(contexts);
+      }
+      docs.put(field.stringValue(), doc);
+    }
+    return docs;
+  }
 }