Add a stored fields test that indexes LineFileDocs. (#12927)

Real-world data exhibits patterns that are taken advantage of by the
compression logic, but also hardly reproducible in a randomized way. This makes
this new test introduce interesting coverage.

It takes one second to run on my machine, so I did not mark it `@Nightly`.
This commit is contained in:
Adrien Grand 2023-12-19 11:20:14 +01:00 committed by GitHub
parent bf45ab79ec
commit 5c084fcd6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 31 additions and 0 deletions

View File

@ -72,6 +72,7 @@ import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.store.MockDirectoryWrapper;
import org.apache.lucene.tests.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.tests.util.LineFileDocs;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -1006,4 +1007,34 @@ public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormat
verifyStoreFields.run();
IOUtils.close(iw, dir);
}
/** Test realistic data, which typically compresses better than random data. */
public void testLineFileDocs() throws IOException {
// Use a FS dir and a non-randomized IWC to not slow down indexing
try (Directory dir = newFSDirectory(createTempDir())) {
try (LineFileDocs docs = new LineFileDocs(random());
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
final int numDocs = atLeast(10_000);
for (int i = 0; i < numDocs; ++i) {
// Only keep stored fields
Document doc = docs.nextDoc();
Document storedDoc = new Document();
for (IndexableField field : doc.getFields()) {
if (field.fieldType().stored()) {
IndexableField storedField = field;
if (field.stringValue() != null) {
// Disable indexing
storedField = new StoredField(field.name(), field.stringValue());
}
storedDoc.add(storedField);
}
}
w.addDocument(storedDoc);
}
w.forceMerge(1);
}
TestUtil.checkIndex(dir);
}
}
}