From 5c084fcd6e9e94548303017be34c257e7a89227d Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 19 Dec 2023 11:20:14 +0100 Subject: [PATCH] Add a stored fields test that indexes LineFileDocs. (#12927) Real-world data exhibits patterns that are taken advantage of by the compression logic, but also hardly reproducible in a randomized way. This makes this new test introduce interesting coverage. It takes one second to run on my machine, so I did not mark it `@Nightly`. --- .../index/BaseStoredFieldsFormatTestCase.java | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseStoredFieldsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseStoredFieldsFormatTestCase.java index 2c7aadab81f..761cf9b7387 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseStoredFieldsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseStoredFieldsFormatTestCase.java @@ -72,6 +72,7 @@ import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.store.MockDirectoryWrapper; import org.apache.lucene.tests.store.MockDirectoryWrapper.Throttling; +import org.apache.lucene.tests.util.LineFileDocs; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -1006,4 +1007,34 @@ public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormat verifyStoreFields.run(); IOUtils.close(iw, dir); } + + /** Test realistic data, which typically compresses better than random data. */ + public void testLineFileDocs() throws IOException { + // Use a FS dir and a non-randomized IWC to not slow down indexing + try (Directory dir = newFSDirectory(createTempDir())) { + try (LineFileDocs docs = new LineFileDocs(random()); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) { + final int numDocs = atLeast(10_000); + for (int i = 0; i < numDocs; ++i) { + // Only keep stored fields + Document doc = docs.nextDoc(); + Document storedDoc = new Document(); + for (IndexableField field : doc.getFields()) { + if (field.fieldType().stored()) { + IndexableField storedField = field; + if (field.stringValue() != null) { + // Disable indexing + storedField = new StoredField(field.name(), field.stringValue()); + } + storedDoc.add(storedField); + } + } + + w.addDocument(storedDoc); + } + w.forceMerge(1); + } + TestUtil.checkIndex(dir); + } + } }