From 1529c57ca19385b35a153357785aed9d47eb5150 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 1 Feb 2015 09:12:09 +0000 Subject: [PATCH] LUCENE-6212: remove per-doc analyzers git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656272 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../analysis/uima/UIMABaseAnalyzerTest.java | 4 +- .../ClassificationTestBase.java | 51 +- .../utils/DataSplitterTest.java | 4 +- .../utils/DocToDoubleVectorUtilsTest.java | 4 +- .../org/apache/lucene/index/IndexWriter.java | 72 +- .../lucene/index/TrackingIndexWriter.java | 36 - .../lucene/analysis/TestMockAnalyzer.java | 4 +- .../index/TestIndexWriterExceptions.java | 26 +- .../org/apache/lucene/index/TestPayloads.java | 888 +++++++++--------- .../TestControlledRealTimeReopenThread.java | 4 +- .../search/highlight/HighlighterTest.java | 8 +- .../lucene/index/SortingMergePolicy.java | 2 +- .../lucene/index/RandomIndexWriter.java | 9 +- .../java/org/apache/solr/core/SolrCore.java | 2 +- .../solr/update/DefaultSolrCoreState.java | 2 +- .../solr/update/DirectUpdateHandler2.java | 11 +- .../apache/solr/update/SolrIndexConfig.java | 37 +- .../apache/solr/update/SolrIndexSplitter.java | 2 +- .../apache/solr/update/SolrIndexWriter.java | 11 +- .../test/org/apache/solr/core/TestConfig.java | 2 +- .../solr/core/TestInfoStreamLogging.java | 2 +- .../solr/core/TestMergePolicyConfig.java | 8 +- .../apache/solr/core/TestSolrIndexConfig.java | 2 +- .../solr/update/SolrIndexConfigTest.java | 25 +- 25 files changed, 574 insertions(+), 647 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e2d711cf4a7..e722c396f45 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -392,6 +392,11 @@ API Changes Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been removed and boolean queries now always score in order. +* LUCENE-6212: IndexWriter no longer accepts per-document Analyzer to + add/updateDocument. These methods were trappy as they made it + easy to accidentally index tokens that were not easily + searchable. (Mike McCandless) + Bug Fixes * LUCENE-5650: Enforce read-only access to any path outside the temporary diff --git a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java index a2801417cd0..892b35a20df 100644 --- a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java +++ b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java @@ -75,7 +75,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase { doc.add(new TextField("title", dummyTitle, Field.Store.YES)); String dummyContent = "there is some content written here"; doc.add(new TextField("contents", dummyContent, Field.Store.YES)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); writer.commit(); // try the search over the first doc @@ -96,7 +96,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase { doc.add(new TextField("title", dogmasTitle, Field.Store.YES)); String dogmasContents = "white men can't jump"; doc.add(new TextField("contents", dogmasContents, Field.Store.YES)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); writer.commit(); directoryReader.close(); diff --git a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java index 3ff1f80d566..b8334afb8f6 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java @@ -16,11 +16,15 @@ */ package org.apache.lucene.classification; +import java.io.IOException; +import java.util.Random; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; @@ -32,9 +36,6 @@ import org.apache.lucene.util.TestUtil; import org.junit.After; import org.junit.Before; -import java.io.IOException; -import java.util.Random; - /** * Base class for testing {@link Classifier}s */ @@ -113,7 +114,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass()); double score = classificationResult.getScore(); assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0); - updateSampleIndex(analyzer); + updateSampleIndex(); ClassificationResult secondClassificationResult = classifier.assignClass(inputDoc); assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass()); assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore())); @@ -125,7 +126,8 @@ public abstract class ClassificationTestBase extends LuceneTestCase { } private void populateSampleIndex(Analyzer analyzer) throws IOException { - indexWriter.deleteAll(); + indexWriter.close(); + indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE)); indexWriter.commit(); String text; @@ -138,7 +140,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" + @@ -146,7 +148,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "And there's a threshold question that he has to answer for the American people and " + @@ -155,7 +157,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " + @@ -164,7 +166,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " + @@ -173,7 +175,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "So, about all those experts and analysts who've spent the past year or so saying " + @@ -181,7 +183,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "More than 400 million people trust Google with their e-mail, and 50 million store files" + @@ -190,12 +192,12 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "unlabeled doc"; doc.add(new Field(textFieldName, text, ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); indexWriter.commit(); } @@ -217,7 +219,8 @@ public abstract class ClassificationTestBase extends LuceneTestCase { } private void populatePerformanceIndex(Analyzer analyzer) throws IOException { - indexWriter.deleteAll(); + indexWriter.close(); + indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE)); indexWriter.commit(); FieldType ft = new FieldType(TextField.TYPE_STORED); @@ -232,7 +235,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(textFieldName, createRandomString(random), ft)); doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft)); doc.add(new Field(booleanFieldName, String.valueOf(b), ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); } indexWriter.commit(); } @@ -246,7 +249,7 @@ public abstract class ClassificationTestBase extends LuceneTestCase { return builder.toString(); } - private void updateSampleIndex(Analyzer analyzer) throws Exception { + private void updateSampleIndex() throws Exception { String text; @@ -256,54 +259,54 @@ public abstract class ClassificationTestBase extends LuceneTestCase { doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role."; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement"; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Illinois becomes 16th state to allow same-sex marriage."; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(booleanFieldName, "true", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report."; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement."; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009."; doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(booleanFieldName, "false", ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); doc = new Document(); text = "second unlabeled doc"; doc.add(new Field(textFieldName, text, ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); indexWriter.commit(); } diff --git a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java index e7493455ec4..9b64c848e22 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java @@ -64,8 +64,6 @@ public class DataSplitterTest extends LuceneTestCase { ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); - Analyzer analyzer = new MockAnalyzer(random()); - Document doc; Random rnd = random(); for (int i = 0; i < 100; i++) { @@ -73,7 +71,7 @@ public class DataSplitterTest extends LuceneTestCase { doc.add(new Field(idFieldName, Integer.toString(i), ft)); doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft)); doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); } indexWriter.commit(); diff --git a/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java b/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java index 24de791e947..41b09c6132b 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java @@ -55,14 +55,12 @@ public class DocToDoubleVectorUtilsTest extends LuceneTestCase { ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); - Analyzer analyzer = new MockAnalyzer(random()); - Document doc; for (int i = 0; i < 10; i++) { doc = new Document(); doc.add(new Field("id", Integer.toString(i), ft)); doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft)); - indexWriter.addDocument(doc, analyzer); + indexWriter.addDocument(doc); } indexWriter.commit(); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 63733171e35..7da70f0e5f6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1127,22 +1127,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * @throws IOException if there is a low-level IO error */ public void addDocument(IndexDocument doc) throws IOException { - addDocument(doc, analyzer); - } - - /** - * Adds a document to this index, using the provided analyzer instead of the - * value of {@link #getAnalyzer()}. - * - *

See {@link #addDocument(IndexDocument)} for details on - * index and IndexWriter state after an Exception, and - * flushing/merging temporary free space requirements.

- * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException { - updateDocument(null, doc, analyzer); + updateDocument(null, doc); } /** @@ -1183,22 +1168,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * @lucene.experimental */ public void addDocuments(Iterable docs) throws IOException { - addDocuments(docs, analyzer); - } - - /** - * Atomically adds a block of documents, analyzed using the - * provided analyzer, with sequentially assigned document - * IDs, such that an external reader will see all or none - * of the documents. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * - * @lucene.experimental - */ - public void addDocuments(Iterable docs, Analyzer analyzer) throws IOException { - updateDocuments(null, docs, analyzer); + updateDocuments(null, docs); } /** @@ -1215,24 +1185,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * @lucene.experimental */ public void updateDocuments(Term delTerm, Iterable docs) throws IOException { - updateDocuments(delTerm, docs, analyzer); - } - - /** - * Atomically deletes documents matching the provided - * delTerm and adds a block of documents, analyzed using - * the provided analyzer, with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * - * See {@link #addDocuments(Iterable)}. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * - * @lucene.experimental - */ - public void updateDocuments(Term delTerm, Iterable docs, Analyzer analyzer) throws IOException { ensureOpen(); try { boolean success = false; @@ -1384,26 +1336,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { */ public void updateDocument(Term term, IndexDocument doc) throws IOException { ensureOpen(); - updateDocument(term, doc, analyzer); - } - - /** - * Updates a document by first deleting the document(s) - * containing term and then adding the new - * document. The delete and then add are atomic as seen - * by a reader on the same index (flush may happen only after - * the add). - * - * @param term the term to identify the document(s) to be - * deleted - * @param doc the document to be added - * @param analyzer the analyzer to use when analyzing the document - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer) - throws IOException { - ensureOpen(); try { boolean success = false; try { diff --git a/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java index 21e5dfb16bd..d1972cd51eb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java @@ -47,15 +47,6 @@ public class TrackingIndexWriter { this.writer = writer; } - /** Calls {@link - * IndexWriter#updateDocument(Term,IndexDocument,Analyzer)} - * and returns the generation that reflects this change. */ - public long updateDocument(Term t, IndexDocument d, Analyzer a) throws IOException { - writer.updateDocument(t, d, a); - // Return gen as of when indexing finished: - return indexingGen.get(); - } - /** Calls {@link * IndexWriter#updateDocument(Term,IndexDocument)} and * returns the generation that reflects this change. */ @@ -65,15 +56,6 @@ public class TrackingIndexWriter { return indexingGen.get(); } - /** Calls {@link - * IndexWriter#updateDocuments(Term,Iterable,Analyzer)} - * and returns the generation that reflects this change. */ - public long updateDocuments(Term t, Iterable docs, Analyzer a) throws IOException { - writer.updateDocuments(t, docs, a); - // Return gen as of when indexing finished: - return indexingGen.get(); - } - /** Calls {@link * IndexWriter#updateDocuments(Term,Iterable)} and returns * the generation that reflects this change. */ @@ -123,24 +105,6 @@ public class TrackingIndexWriter { return indexingGen.get(); } - /** Calls {@link - * IndexWriter#addDocument(IndexDocument,Analyzer)} and - * returns the generation that reflects this change. */ - public long addDocument(IndexDocument d, Analyzer a) throws IOException { - writer.addDocument(d, a); - // Return gen as of when indexing finished: - return indexingGen.get(); - } - - /** Calls {@link - * IndexWriter#addDocuments(Iterable,Analyzer)} and - * returns the generation that reflects this change. */ - public long addDocuments(Iterable docs, Analyzer a) throws IOException { - writer.addDocuments(docs, a); - // Return gen as of when indexing finished: - return indexingGen.get(); - } - /** Calls {@link IndexWriter#addDocument(IndexDocument)} * and returns the generation that reflects this change. */ public long addDocument(IndexDocument d) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java index 485ea3f667b..0a67974e2ee 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -305,7 +305,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { } }; - final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory()); + final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a); final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS); @@ -315,7 +315,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { ft.setStoreTermVectorOffsets(true); doc.add(new Field("f", "a", ft)); doc.add(new Field("f", "a", ft)); - writer.addDocument(doc, a); + writer.addDocument(doc); final LeafReader reader = getOnlySegmentReader(writer.getReader()); final Fields fields = reader.getTermVectors(0); final Terms terms = fields.terms("f"); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 1ba214e4279..77ee7e52a4d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -392,27 +392,35 @@ public class TestIndexWriterExceptions extends LuceneTestCase { // LUCENE-1208 public void testExceptionJustBeforeFlush() throws IOException { Directory dir = newDirectory(); - IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, - newIndexWriterConfig(new MockAnalyzer(random())) - .setMaxBufferedDocs(2), - new TestPoint1()); - Document doc = new Document(); - doc.add(newTextField("field", "a field", Field.Store.YES)); - w.addDocument(doc); + + final AtomicBoolean doCrash = new AtomicBoolean(); Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override public TokenStreamComponents createComponents(String fieldName) { MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. - return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); + TokenStream stream = tokenizer; + if (doCrash.get()) { + stream = new CrashingFilter(fieldName, stream); + } + return new TokenStreamComponents(tokenizer, stream); } }; + IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, + newIndexWriterConfig(analyzer) + .setMaxBufferedDocs(2), + new TestPoint1()); + Document doc = new Document(); + doc.add(newTextField("field", "a field", Field.Store.YES)); + w.addDocument(doc); + Document crashDoc = new Document(); crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES)); + doCrash.set(true); try { - w.addDocument(crashDoc, analyzer); + w.addDocument(crashDoc); fail("did not hit expected exception"); } catch (IOException ioe) { // expected diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java index a9e068c13cf..fb0d142032a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java @@ -42,521 +42,520 @@ import org.apache.lucene.util.TestUtil; public class TestPayloads extends LuceneTestCase { - // Simple tests to test the Payload class - public void testPayload() throws Exception { - BytesRef payload = new BytesRef("This is a test!"); - assertEquals("Wrong payload length.", "This is a test!".length(), payload.length); + // Simple tests to test the Payload class + public void testPayload() throws Exception { + BytesRef payload = new BytesRef("This is a test!"); + assertEquals("Wrong payload length.", "This is a test!".length(), payload.length); - BytesRef clone = payload.clone(); - assertEquals(payload.length, clone.length); - for (int i = 0; i < payload.length; i++) { - assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]); - } + BytesRef clone = payload.clone(); + assertEquals(payload.length, clone.length); + for (int i = 0; i < payload.length; i++) { + assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]); + } - } + } - // Tests whether the DocumentWriter and SegmentMerger correctly enable the - // payload bit in the FieldInfo - public void testPayloadFieldBit() throws Exception { - Directory ram = newDirectory(); - PayloadAnalyzer analyzer = new PayloadAnalyzer(); - IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)); - Document d = new Document(); - // this field won't have any payloads - d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); - // this field will have payloads in all docs, however not for all term positions, - // so this field is used to check if the DocumentWriter correctly enables the payloads bit - // even if only some term positions have payloads - d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); - d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO)); - // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads - // enabled in only some documents - d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); - // only add payload data for field f2 - analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); - writer.addDocument(d); - // flush - writer.close(); + // Tests whether the DocumentWriter and SegmentMerger correctly enable the + // payload bit in the FieldInfo + public void testPayloadFieldBit() throws Exception { + Directory ram = newDirectory(); + PayloadAnalyzer analyzer = new PayloadAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)); + Document d = new Document(); + // this field won't have any payloads + d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); + // this field will have payloads in all docs, however not for all term positions, + // so this field is used to check if the DocumentWriter correctly enables the payloads bit + // even if only some term positions have payloads + d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); + d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO)); + // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads + // enabled in only some documents + d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); + // only add payload data for field f2 + analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); + writer.addDocument(d); + // flush + writer.close(); - SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); - FieldInfos fi = reader.getFieldInfos(); - assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); - assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); - assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads()); - reader.close(); + SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); + FieldInfos fi = reader.getFieldInfos(); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads()); + reader.close(); - // now we add another document which has payloads for field f3 and verify if the SegmentMerger - // enabled payloads for that field - analyzer = new PayloadAnalyzer(); // Clear payload state for each field - writer = new IndexWriter(ram, newIndexWriterConfig(analyzer) - .setOpenMode(OpenMode.CREATE)); - d = new Document(); - d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); - d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); - d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); - d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); - // add payload data for field f2 and f3 - analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); - analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3); - writer.addDocument(d); + // now we add another document which has payloads for field f3 and verify if the SegmentMerger + // enabled payloads for that field + analyzer = new PayloadAnalyzer(); // Clear payload state for each field + writer = new IndexWriter(ram, newIndexWriterConfig(analyzer) + .setOpenMode(OpenMode.CREATE)); + d = new Document(); + d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); + d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); + d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); + d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); + // add payload data for field f2 and f3 + analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); + analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3); + writer.addDocument(d); - // force merge - writer.forceMerge(1); - // flush - writer.close(); + // force merge + writer.forceMerge(1); + // flush + writer.close(); - reader = getOnlySegmentReader(DirectoryReader.open(ram)); - fi = reader.getFieldInfos(); - assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); - assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); - assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads()); - reader.close(); - ram.close(); - } + reader = getOnlySegmentReader(DirectoryReader.open(ram)); + fi = reader.getFieldInfos(); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads()); + reader.close(); + ram.close(); + } - // Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory - public void testPayloadsEncoding() throws Exception { - Directory dir = newDirectory(); - performTest(dir); - dir.close(); - } + // Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory + public void testPayloadsEncoding() throws Exception { + Directory dir = newDirectory(); + performTest(dir); + dir.close(); + } - // builds an index with payloads in the given Directory and performs - // different tests to verify the payload encoding - private void performTest(Directory dir) throws Exception { - PayloadAnalyzer analyzer = new PayloadAnalyzer(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) - .setOpenMode(OpenMode.CREATE) - .setMergePolicy(newLogMergePolicy())); + // builds an index with payloads in the given Directory and performs + // different tests to verify the payload encoding + private void performTest(Directory dir) throws Exception { + PayloadAnalyzer analyzer = new PayloadAnalyzer(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) + .setOpenMode(OpenMode.CREATE) + .setMergePolicy(newLogMergePolicy())); - // should be in sync with value in TermInfosWriter - final int skipInterval = 16; + // should be in sync with value in TermInfosWriter + final int skipInterval = 16; - final int numTerms = 5; - final String fieldName = "f1"; + final int numTerms = 5; + final String fieldName = "f1"; - int numDocs = skipInterval + 1; - // create content for the test documents with just a few terms - Term[] terms = generateTerms(fieldName, numTerms); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < terms.length; i++) { - sb.append(terms[i].text()); - sb.append(" "); - } - String content = sb.toString(); + int numDocs = skipInterval + 1; + // create content for the test documents with just a few terms + Term[] terms = generateTerms(fieldName, numTerms); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < terms.length; i++) { + sb.append(terms[i].text()); + sb.append(" "); + } + String content = sb.toString(); - int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; - byte[] payloadData = generateRandomData(payloadDataLength); + int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; + byte[] payloadData = generateRandomData(payloadDataLength); - Document d = new Document(); - d.add(newTextField(fieldName, content, Field.Store.NO)); - // add the same document multiple times to have the same payload lengths for all - // occurrences within two consecutive skip intervals - int offset = 0; - for (int i = 0; i < 2 * numDocs; i++) { - analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); - offset += numTerms; - writer.addDocument(d, analyzer); - } + Document d = new Document(); + d.add(newTextField(fieldName, content, Field.Store.NO)); + // add the same document multiple times to have the same payload lengths for all + // occurrences within two consecutive skip intervals + int offset = 0; + for (int i = 0; i < 2 * numDocs; i++) { + analyzer.setPayloadData(fieldName, payloadData, offset, 1); + offset += numTerms; + writer.addDocument(d); + } - // make sure we create more than one segment to test merging - writer.commit(); + // make sure we create more than one segment to test merging + writer.commit(); - // now we make sure to have different payload lengths next at the next skip point - for (int i = 0; i < numDocs; i++) { - analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); - offset += i * numTerms; - writer.addDocument(d, analyzer); - } + // now we make sure to have different payload lengths next at the next skip point + for (int i = 0; i < numDocs; i++) { + analyzer.setPayloadData(fieldName, payloadData, offset, i); + offset += i * numTerms; + writer.addDocument(d); + } - writer.forceMerge(1); - // flush - writer.close(); + writer.forceMerge(1); + // flush + writer.close(); - /* - * Verify the index - * first we test if all payloads are stored correctly - */ - IndexReader reader = DirectoryReader.open(dir); + /* + * Verify the index + * first we test if all payloads are stored correctly + */ + IndexReader reader = DirectoryReader.open(dir); - byte[] verifyPayloadData = new byte[payloadDataLength]; - offset = 0; - DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms]; - for (int i = 0; i < numTerms; i++) { - tps[i] = MultiFields.getTermPositionsEnum(reader, - MultiFields.getLiveDocs(reader), - terms[i].field(), - new BytesRef(terms[i].text())); - } + byte[] verifyPayloadData = new byte[payloadDataLength]; + offset = 0; + DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms]; + for (int i = 0; i < numTerms; i++) { + tps[i] = MultiFields.getTermPositionsEnum(reader, + MultiFields.getLiveDocs(reader), + terms[i].field(), + new BytesRef(terms[i].text())); + } - while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - for (int i = 1; i < numTerms; i++) { - tps[i].nextDoc(); - } - int freq = tps[0].freq(); + while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + for (int i = 1; i < numTerms; i++) { + tps[i].nextDoc(); + } + int freq = tps[0].freq(); - for (int i = 0; i < freq; i++) { - for (int j = 0; j < numTerms; j++) { - tps[j].nextPosition(); - BytesRef br = tps[j].getPayload(); - if (br != null) { - System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length); - offset += br.length; - } - } - } + for (int i = 0; i < freq; i++) { + for (int j = 0; j < numTerms; j++) { + tps[j].nextPosition(); + BytesRef br = tps[j].getPayload(); + if (br != null) { + System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length); + offset += br.length; + } } + } + } - assertByteArrayEquals(payloadData, verifyPayloadData); + assertByteArrayEquals(payloadData, verifyPayloadData); - /* - * test lazy skipping - */ - DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader, - MultiFields.getLiveDocs(reader), - terms[0].field(), - new BytesRef(terms[0].text())); - tp.nextDoc(); - tp.nextPosition(); - // NOTE: prior rev of this test was failing to first - // call next here: - tp.nextDoc(); - // now we don't read this payload - tp.nextPosition(); - BytesRef payload = tp.getPayload(); - assertEquals("Wrong payload length.", 1, payload.length); - assertEquals(payload.bytes[payload.offset], payloadData[numTerms]); - tp.nextDoc(); - tp.nextPosition(); + /* + * test lazy skipping + */ + DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader, + MultiFields.getLiveDocs(reader), + terms[0].field(), + new BytesRef(terms[0].text())); + tp.nextDoc(); + tp.nextPosition(); + // NOTE: prior rev of this test was failing to first + // call next here: + tp.nextDoc(); + // now we don't read this payload + tp.nextPosition(); + BytesRef payload = tp.getPayload(); + assertEquals("Wrong payload length.", 1, payload.length); + assertEquals(payload.bytes[payload.offset], payloadData[numTerms]); + tp.nextDoc(); + tp.nextPosition(); - // we don't read this payload and skip to a different document - tp.advance(5); - tp.nextPosition(); - payload = tp.getPayload(); - assertEquals("Wrong payload length.", 1, payload.length); - assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]); + // we don't read this payload and skip to a different document + tp.advance(5); + tp.nextPosition(); + payload = tp.getPayload(); + assertEquals("Wrong payload length.", 1, payload.length); + assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]); - /* - * Test different lengths at skip points - */ - tp = MultiFields.getTermPositionsEnum(reader, - MultiFields.getLiveDocs(reader), - terms[1].field(), - new BytesRef(terms[1].text())); - tp.nextDoc(); - tp.nextPosition(); - assertEquals("Wrong payload length.", 1, tp.getPayload().length); - tp.advance(skipInterval - 1); - tp.nextPosition(); - assertEquals("Wrong payload length.", 1, tp.getPayload().length); - tp.advance(2 * skipInterval - 1); - tp.nextPosition(); - assertEquals("Wrong payload length.", 1, tp.getPayload().length); - tp.advance(3 * skipInterval - 1); - tp.nextPosition(); - assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length); + /* + * Test different lengths at skip points + */ + tp = MultiFields.getTermPositionsEnum(reader, + MultiFields.getLiveDocs(reader), + terms[1].field(), + new BytesRef(terms[1].text())); + tp.nextDoc(); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayload().length); + tp.advance(skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayload().length); + tp.advance(2 * skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayload().length); + tp.advance(3 * skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length); - reader.close(); + reader.close(); - // test long payload - analyzer = new PayloadAnalyzer(); - writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) - .setOpenMode(OpenMode.CREATE)); - String singleTerm = "lucene"; + // test long payload + analyzer = new PayloadAnalyzer(); + writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) + .setOpenMode(OpenMode.CREATE)); + String singleTerm = "lucene"; - d = new Document(); - d.add(newTextField(fieldName, singleTerm, Field.Store.NO)); - // add a payload whose length is greater than the buffer size of BufferedIndexOutput - payloadData = generateRandomData(2000); - analyzer.setPayloadData(fieldName, payloadData, 100, 1500); - writer.addDocument(d); + d = new Document(); + d.add(newTextField(fieldName, singleTerm, Field.Store.NO)); + // add a payload whose length is greater than the buffer size of BufferedIndexOutput + payloadData = generateRandomData(2000); + analyzer.setPayloadData(fieldName, payloadData, 100, 1500); + writer.addDocument(d); - writer.forceMerge(1); - // flush - writer.close(); + writer.forceMerge(1); + // flush + writer.close(); - reader = DirectoryReader.open(dir); - tp = MultiFields.getTermPositionsEnum(reader, - MultiFields.getLiveDocs(reader), - fieldName, - new BytesRef(singleTerm)); - tp.nextDoc(); - tp.nextPosition(); + reader = DirectoryReader.open(dir); + tp = MultiFields.getTermPositionsEnum(reader, + MultiFields.getLiveDocs(reader), + fieldName, + new BytesRef(singleTerm)); + tp.nextDoc(); + tp.nextPosition(); - BytesRef br = tp.getPayload(); - verifyPayloadData = new byte[br.length]; - byte[] portion = new byte[1500]; - System.arraycopy(payloadData, 100, portion, 0, 1500); + BytesRef br = tp.getPayload(); + verifyPayloadData = new byte[br.length]; + byte[] portion = new byte[1500]; + System.arraycopy(payloadData, 100, portion, 0, 1500); - assertByteArrayEquals(portion, br.bytes, br.offset, br.length); - reader.close(); + assertByteArrayEquals(portion, br.bytes, br.offset, br.length); + reader.close(); - } + } - static final Charset utf8 = StandardCharsets.UTF_8; + static final Charset utf8 = StandardCharsets.UTF_8; - private void generateRandomData(byte[] data) { - // this test needs the random data to be valid unicode - String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length); - byte b[] = s.getBytes(utf8); - assert b.length == data.length; - System.arraycopy(b, 0, data, 0, b.length); - } + private void generateRandomData(byte[] data) { + // this test needs the random data to be valid unicode + String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length); + byte b[] = s.getBytes(utf8); + assert b.length == data.length; + System.arraycopy(b, 0, data, 0, b.length); + } - private byte[] generateRandomData(int n) { - byte[] data = new byte[n]; - generateRandomData(data); - return data; - } + private byte[] generateRandomData(int n) { + byte[] data = new byte[n]; + generateRandomData(data); + return data; + } - private Term[] generateTerms(String fieldName, int n) { - int maxDigits = (int) (Math.log(n) / Math.log(10)); - Term[] terms = new Term[n]; - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < n; i++) { - sb.setLength(0); - sb.append("t"); - int zeros = maxDigits - (int) (Math.log(i) / Math.log(10)); - for (int j = 0; j < zeros; j++) { - sb.append("0"); - } - sb.append(i); - terms[i] = new Term(fieldName, sb.toString()); - } - return terms; + private Term[] generateTerms(String fieldName, int n) { + int maxDigits = (int) (Math.log(n) / Math.log(10)); + Term[] terms = new Term[n]; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) { + sb.setLength(0); + sb.append("t"); + int zeros = maxDigits - (int) (Math.log(i) / Math.log(10)); + for (int j = 0; j < zeros; j++) { + sb.append("0"); + } + sb.append(i); + terms[i] = new Term(fieldName, sb.toString()); } + return terms; + } - void assertByteArrayEquals(byte[] b1, byte[] b2) { - if (b1.length != b2.length) { - fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length); - } + void assertByteArrayEquals(byte[] b1, byte[] b2) { + if (b1.length != b2.length) { + fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length); + } - for (int i = 0; i < b1.length; i++) { - if (b1[i] != b2[i]) { - fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]); - } - } - } + for (int i = 0; i < b1.length; i++) { + if (b1[i] != b2[i]) { + fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]); + } + } + } void assertByteArrayEquals(byte[] b1, byte[] b2, int b2offset, int b2length) { - if (b1.length != b2length) { - fail("Byte arrays have different lengths: " + b1.length + ", " + b2length); - } + if (b1.length != b2length) { + fail("Byte arrays have different lengths: " + b1.length + ", " + b2length); + } - for (int i = 0; i < b1.length; i++) { - if (b1[i] != b2[b2offset+i]) { - fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]); - } - } - } + for (int i = 0; i < b1.length; i++) { + if (b1[i] != b2[b2offset+i]) { + fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]); + } + } + } - /** - * This Analyzer uses an WhitespaceTokenizer and PayloadFilter. - */ - private static class PayloadAnalyzer extends Analyzer { - Map fieldToData = new HashMap<>(); + static class PayloadData { + byte[] data; + int offset; + int length; - public PayloadAnalyzer() { - super(PER_FIELD_REUSE_STRATEGY); - } - - public PayloadAnalyzer(String field, byte[] data, int offset, int length) { - super(PER_FIELD_REUSE_STRATEGY); - setPayloadData(field, data, offset, length); - } + PayloadData(byte[] data, int offset, int length) { + this.data = data; + this.offset = offset; + this.length = length; + } + } - void setPayloadData(String field, byte[] data, int offset, int length) { - fieldToData.put(field, new PayloadData(data, offset, length)); - } - - @Override - public TokenStreamComponents createComponents(String fieldName) { - PayloadData payload = fieldToData.get(fieldName); - Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false); - TokenStream tokenStream = (payload != null) ? - new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts; - return new TokenStreamComponents(ts, tokenStream); - } - - private static class PayloadData { - byte[] data; - int offset; - int length; + /** + * This Analyzer uses an MockTokenizer and PayloadFilter. + */ + private static class PayloadAnalyzer extends Analyzer { + Map fieldToData = new HashMap<>(); - PayloadData(byte[] data, int offset, int length) { - this.data = data; - this.offset = offset; - this.length = length; - } - } + public PayloadAnalyzer() { + super(PER_FIELD_REUSE_STRATEGY); + } + + public PayloadAnalyzer(String field, byte[] data, int offset, int length) { + super(PER_FIELD_REUSE_STRATEGY); + setPayloadData(field, data, offset, length); } + void setPayloadData(String field, byte[] data, int offset, int length) { + fieldToData.put(field, new PayloadData(data, offset, length)); + } + + @Override + public TokenStreamComponents createComponents(String fieldName) { + PayloadData payload = fieldToData.get(fieldName); + Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false); + TokenStream tokenStream = (payload != null) ? + new PayloadFilter(ts, fieldName, fieldToData) : ts; + return new TokenStreamComponents(ts, tokenStream); + } + } + - /** - * This Filter adds payloads to the tokens. - */ - private static class PayloadFilter extends TokenFilter { - private byte[] data; - private int length; - private int offset; - private int startOffset; - PayloadAttribute payloadAtt; - CharTermAttribute termAttribute; + /** + * This Filter adds payloads to the tokens. + */ + private static class PayloadFilter extends TokenFilter { + PayloadAttribute payloadAtt; + CharTermAttribute termAttribute; + private Map fieldToData; + private String fieldName; + private PayloadData payloadData; + private int offset; - public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { - super(in); - this.data = data; - this.length = length; - this.offset = offset; - this.startOffset = offset; - payloadAtt = addAttribute(PayloadAttribute.class); - termAttribute = addAttribute(CharTermAttribute.class); - } + public PayloadFilter(TokenStream in, String fieldName, Map fieldToData) { + super(in); + this.fieldToData = fieldToData; + this.fieldName = fieldName; + payloadAtt = addAttribute(PayloadAttribute.class); + termAttribute = addAttribute(CharTermAttribute.class); + } - @Override - public boolean incrementToken() throws IOException { - boolean hasNext = input.incrementToken(); - if (!hasNext) { - return false; - } + @Override + public boolean incrementToken() throws IOException { + boolean hasNext = input.incrementToken(); + if (!hasNext) { + return false; + } - // Some values of the same field are to have payloads and others not - if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) { - BytesRef p = new BytesRef(data, offset, length); - payloadAtt.setPayload(p); - offset += length; - } else { - payloadAtt.setPayload(null); - } + // Some values of the same field are to have payloads and others not + if (offset + payloadData.length <= payloadData.data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) { + BytesRef p = new BytesRef(payloadData.data, offset, payloadData.length); + payloadAtt.setPayload(p); + offset += payloadData.length; + } else { + payloadAtt.setPayload(null); + } - return true; + return true; + } + + @Override + public void reset() throws IOException { + super.reset(); + this.payloadData = fieldToData.get(fieldName); + this.offset = payloadData.offset; + } + } + + public void testThreadSafety() throws Exception { + final int numThreads = 5; + final int numDocs = atLeast(50); + final ByteArrayPool pool = new ByteArrayPool(numThreads, 5); + + Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + final String field = "test"; + + Thread[] ingesters = new Thread[numThreads]; + for (int i = 0; i < numThreads; i++) { + ingesters[i] = new Thread() { + @Override + public void run() { + try { + for (int j = 0; j < numDocs; j++) { + Document d = new Document(); + d.add(new TextField(field, new PoolingPayloadTokenStream(pool))); + writer.addDocument(d); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + }; + ingesters[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + ingesters[i].join(); + } + writer.close(); + IndexReader reader = DirectoryReader.open(dir); + TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null); + Bits liveDocs = MultiFields.getLiveDocs(reader); + DocsAndPositionsEnum tp = null; + while (terms.next() != null) { + String termText = terms.term().utf8ToString(); + tp = terms.docsAndPositions(liveDocs, tp); + while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + int freq = tp.freq(); + for (int i = 0; i < freq; i++) { + tp.nextPosition(); + final BytesRef payload = tp.getPayload(); + assertEquals(termText, payload.utf8ToString()); } + } + } + reader.close(); + dir.close(); + assertEquals(pool.size(), numThreads); + } + + private class PoolingPayloadTokenStream extends TokenStream { + private byte[] payload; + private boolean first; + private ByteArrayPool pool; + private String term; - @Override - public void reset() throws IOException { - super.reset(); - this.offset = startOffset; + CharTermAttribute termAtt; + PayloadAttribute payloadAtt; + + PoolingPayloadTokenStream(ByteArrayPool pool) { + this.pool = pool; + payload = pool.get(); + generateRandomData(payload); + term = new String(payload, 0, payload.length, utf8); + first = true; + payloadAtt = addAttribute(PayloadAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (!first) return false; + first = false; + clearAttributes(); + termAtt.append(term); + payloadAtt.setPayload(new BytesRef(payload)); + return true; + } + + @Override + public void close() throws IOException { + pool.release(payload); + } + + } + + private static class ByteArrayPool { + private List pool; + + ByteArrayPool(int capacity, int size) { + pool = new ArrayList<>(); + for (int i = 0; i < capacity; i++) { + pool.add(new byte[size]); } } - public void testThreadSafety() throws Exception { - final int numThreads = 5; - final int numDocs = atLeast(50); - final ByteArrayPool pool = new ByteArrayPool(numThreads, 5); - - Directory dir = newDirectory(); - final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); - final String field = "test"; - - Thread[] ingesters = new Thread[numThreads]; - for (int i = 0; i < numThreads; i++) { - ingesters[i] = new Thread() { - @Override - public void run() { - try { - for (int j = 0; j < numDocs; j++) { - Document d = new Document(); - d.add(new TextField(field, new PoolingPayloadTokenStream(pool))); - writer.addDocument(d); - } - } catch (Exception e) { - e.printStackTrace(); - fail(e.toString()); - } - } - }; - ingesters[i].start(); - } - - for (int i = 0; i < numThreads; i++) { - ingesters[i].join(); - } - writer.close(); - IndexReader reader = DirectoryReader.open(dir); - TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null); - Bits liveDocs = MultiFields.getLiveDocs(reader); - DocsAndPositionsEnum tp = null; - while (terms.next() != null) { - String termText = terms.term().utf8ToString(); - tp = terms.docsAndPositions(liveDocs, tp); - while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - int freq = tp.freq(); - for (int i = 0; i < freq; i++) { - tp.nextPosition(); - final BytesRef payload = tp.getPayload(); - assertEquals(termText, payload.utf8ToString()); - } - } - } - reader.close(); - dir.close(); - assertEquals(pool.size(), numThreads); + synchronized byte[] get() { + return pool.remove(0); } - - private class PoolingPayloadTokenStream extends TokenStream { - private byte[] payload; - private boolean first; - private ByteArrayPool pool; - private String term; - - CharTermAttribute termAtt; - PayloadAttribute payloadAtt; - - PoolingPayloadTokenStream(ByteArrayPool pool) { - this.pool = pool; - payload = pool.get(); - generateRandomData(payload); - term = new String(payload, 0, payload.length, utf8); - first = true; - payloadAtt = addAttribute(PayloadAttribute.class); - termAtt = addAttribute(CharTermAttribute.class); - } - - @Override - public boolean incrementToken() throws IOException { - if (!first) return false; - first = false; - clearAttributes(); - termAtt.append(term); - payloadAtt.setPayload(new BytesRef(payload)); - return true; - } - - @Override - public void close() throws IOException { - pool.release(payload); - } + synchronized void release(byte[] b) { + pool.add(b); } - - private static class ByteArrayPool { - private List pool; - ByteArrayPool(int capacity, int size) { - pool = new ArrayList<>(); - for (int i = 0; i < capacity; i++) { - pool.add(new byte[size]); - } - } - - synchronized byte[] get() { - return pool.remove(0); - } - - synchronized void release(byte[] b) { - pool.add(b); - } - - synchronized int size() { - return pool.size(); - } + synchronized int size() { + return pool.size(); } + } public void testAcrossFields() throws Exception { Directory dir = newDirectory(); @@ -646,5 +645,4 @@ public class TestPayloads extends LuceneTestCase { reader.close(); dir.close(); } - } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java index 7cead0e64ab..37bc44b3590 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java @@ -390,9 +390,9 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc @Override public void updateDocument(Term term, - IndexDocument doc, Analyzer analyzer) + IndexDocument doc) throws IOException { - super.updateDocument(term, doc, analyzer); + super.updateDocument(term, doc); try { if (waitAfterUpdate) { signal.countDown(); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 7072d93cb46..bbc79d4d180 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -2040,22 +2040,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte Document doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 1)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 3)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 5)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 7)); - writer.addDocument(doc, analyzer); + writer.addDocument(doc); Document childDoc = doc(FIELD_NAME, "child document"); Document parentDoc = doc(FIELD_NAME, "parent document"); diff --git a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java index 716f3e8004e..9e7b9a71c90 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java +++ b/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java @@ -47,7 +47,7 @@ import org.apache.lucene.util.packed.PackedLongValues; * will be sorted while segments resulting from a flush will be in the order * in which documents have been added. *

NOTE: Never use this policy if you rely on - * {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments} + * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments} * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. *

NOTE: This policy should only be used with idempotent {@code Sort}s * so that the order of segments is predictable. For example, using diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java index 5bf5498b61a..19c3fc4996a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java @@ -103,11 +103,6 @@ public class RandomIndexWriter implements Closeable { * @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument) */ public void addDocument(final IndexDocument doc) throws IOException { - LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig()); - addDocument(doc, w.getAnalyzer()); - } - - public void addDocument(final IndexDocument doc, Analyzer a) throws IOException { LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig()); if (r.nextInt(5) == 3) { // TODO: maybe, we should simply buffer up added docs @@ -141,9 +136,9 @@ public class RandomIndexWriter implements Closeable { } }; } - }, a); + }); } else { - w.addDocument(doc, a); + w.addDocument(doc); } maybeCommit(); diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 812314106fb..6a21c502f8a 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -536,7 +536,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable { log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist." + " Creating new index..."); - SolrIndexWriter writer = SolrIndexWriter.create("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, + SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true, getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec); writer.close(); } diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java index 1ff0e19fe18..8aa2178c72f 100644 --- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java +++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java @@ -264,7 +264,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover } protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException { - return SolrIndexWriter.create(name, core.getNewIndexDir(), + return SolrIndexWriter.create(core, name, core.getNewIndexDir(), core.getDirectoryFactory(), false, core.getLatestSchema(), core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec()); } diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java index 8ea28c8db21..814d7a4e5c8 100644 --- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -235,11 +235,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState } if (cmd.isBlock()) { - writer.updateDocuments(updateTerm, cmd, schema.getIndexAnalyzer()); + writer.updateDocuments(updateTerm, cmd); } else { Document luceneDocument = cmd.getLuceneDocument(); // SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer); - writer.updateDocument(updateTerm, luceneDocument, schema.getIndexAnalyzer()); + writer.updateDocument(updateTerm, luceneDocument); } // SolrCore.verbose("updateDocument",updateTerm,"DONE"); @@ -264,9 +264,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState } else { // allow duplicates if (cmd.isBlock()) { - writer.addDocuments(cmd, schema.getIndexAnalyzer()); + writer.addDocuments(cmd); } else { - writer.addDocument(cmd.getLuceneDocument(), schema.getIndexAnalyzer()); + writer.addDocument(cmd.getLuceneDocument()); } if (ulog != null) ulog.add(cmd); @@ -442,8 +442,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState RefCounted iw = solrCoreState.getIndexWriter(core); try { IndexWriter writer = iw.get(); - writer.updateDocument(idTerm, luceneDocument, cmd.getReq().getSchema() - .getIndexAnalyzer()); + writer.updateDocument(idTerm, luceneDocument); for (Query q : dbqList) { writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema())); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index e8c1f33afb5..793490ea81c 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -17,6 +17,14 @@ package org.apache.solr.update; +import java.io.File; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; import org.apache.lucene.index.*; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.util.InfoStream; @@ -24,16 +32,14 @@ import org.apache.lucene.util.Version; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.MapSerializable; -import org.apache.solr.core.SolrConfig; import org.apache.solr.core.PluginInfo; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrCore; import org.apache.solr.schema.IndexSchema; import org.apache.solr.util.SolrPluginUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.List; -import java.util.Map; - import static org.apache.solr.core.Config.assertWarnOrFail; /** @@ -180,12 +186,23 @@ public class SolrIndexConfig implements MapSerializable { return l.isEmpty() ? def : l.get(0); } - public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { - // so that we can update the analyzer on core reload, we pass null - // for the default analyzer, and explicitly pass an analyzer on - // appropriate calls to IndexWriter - - IndexWriterConfig iwc = new IndexWriterConfig(null); + private static class DelayedSchemaAnalyzer extends DelegatingAnalyzerWrapper { + private final SolrCore core; + + public DelayedSchemaAnalyzer(SolrCore core) { + super(PER_FIELD_REUSE_STRATEGY); + this.core = core; + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return core.getLatestSchema().getIndexAnalyzer(); + } + } + + public IndexWriterConfig toIndexWriterConfig(SolrCore core) { + IndexSchema schema = core.getLatestSchema(); + IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core)); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java index f1109433d3e..8b552817b39 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java @@ -121,7 +121,7 @@ public class SolrIndexSplitter { } else { SolrCore core = searcher.getCore(); String path = paths.get(partitionNumber); - iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path, + iw = SolrIndexWriter.create(core, "SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path, core.getDirectoryFactory(), true, core.getLatestSchema(), core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec()); } diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java index ea9e1b8c154..036d0ad8bd2 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java @@ -27,8 +27,9 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.util.InfoStream; import org.apache.solr.common.util.IOUtils; -import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory.DirContext; +import org.apache.solr.core.DirectoryFactory; +import org.apache.solr.core.SolrCore; import org.apache.solr.schema.IndexSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,12 +57,12 @@ public class SolrIndexWriter extends IndexWriter { private InfoStream infoStream; private Directory directory; - public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { + public static SolrIndexWriter create(SolrCore core, String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { SolrIndexWriter w = null; final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType); try { - w = new SolrIndexWriter(name, path, d, create, schema, + w = new SolrIndexWriter(core, name, path, d, create, schema, config, delPolicy, codec); w.setDirectoryFactory(directoryFactory); return w; @@ -73,9 +74,9 @@ public class SolrIndexWriter extends IndexWriter { } } - private SolrIndexWriter(String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { + private SolrIndexWriter(SolrCore core, String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { super(directory, - config.toIndexWriterConfig(schema). + config.toIndexWriterConfig(core). setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND). setIndexDeletionPolicy(delPolicy).setCodec(codec) ); diff --git a/solr/core/src/test/org/apache/solr/core/TestConfig.java b/solr/core/src/test/org/apache/solr/core/TestConfig.java index 41d83d31b93..4a3bb7b2e0a 100644 --- a/solr/core/src/test/org/apache/solr/core/TestConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestConfig.java @@ -116,7 +116,7 @@ public class TestConfig extends SolrTestCaseJ4 { assertEquals("default useCompoundFile", false, sic.getUseCompoundFile()); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); - IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema); + IndexWriterConfig iwc = sic.toIndexWriterConfig(h.getCore()); assertNotNull("null mp", iwc.getMergePolicy()); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); diff --git a/solr/core/src/test/org/apache/solr/core/TestInfoStreamLogging.java b/solr/core/src/test/org/apache/solr/core/TestInfoStreamLogging.java index fa6bc3fd852..74b3f4be543 100644 --- a/solr/core/src/test/org/apache/solr/core/TestInfoStreamLogging.java +++ b/solr/core/src/test/org/apache/solr/core/TestInfoStreamLogging.java @@ -31,7 +31,7 @@ public class TestInfoStreamLogging extends SolrTestCaseJ4 { } public void testIndexConfig() throws Exception { - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream); } diff --git a/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java b/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java index 7e1ebecd58f..1b9c7cbaa68 100644 --- a/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java @@ -45,7 +45,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 { public void testDefaultMergePolicyConfig() throws Exception { initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml"); - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); assertEquals(false, iwc.getUseCompoundFile()); TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, @@ -61,7 +61,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 { = Boolean.parseBoolean(System.getProperty("useCompoundFile")); initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml"); - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); assertEquals(expectCFS, iwc.getUseCompoundFile()); @@ -81,7 +81,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 { = Boolean.parseBoolean(System.getProperty("useCompoundFile")); initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml"); - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); assertEquals(expectCFS, iwc.getUseCompoundFile()); @@ -122,7 +122,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 { System.setProperty("solr.test.log.merge.policy", mpClass.getName()); initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml"); - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); // verify some props set to -1 get lucene internal defaults assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs); diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrIndexConfig.java b/solr/core/src/test/org/apache/solr/core/TestSolrIndexConfig.java index 543b6485bcc..56663acc7bc 100644 --- a/solr/core/src/test/org/apache/solr/core/TestSolrIndexConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestSolrIndexConfig.java @@ -47,7 +47,7 @@ public class TestSolrIndexConfig extends SolrTestCaseJ4 { public void testIndexConfigParsing() throws Exception { - IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); + IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore()); try { checkIndexWriterConfig(iwc); } finally { diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java index a29cf373a12..d5019f71f29 100644 --- a/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java +++ b/solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java @@ -17,6 +17,10 @@ package org.apache.solr.update; * limitations under the License. */ +import java.io.File; +import java.io.IOException; +import javax.xml.parsers.ParserConfigurationException; + import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.SimpleMergedSegmentWarmer; @@ -26,13 +30,10 @@ import org.apache.solr.core.SolrConfig; import org.apache.solr.core.TestMergePolicyConfig; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchemaFactory; +import org.junit.BeforeClass; import org.junit.Test; import org.xml.sax.SAXException; -import javax.xml.parsers.ParserConfigurationException; -import java.io.File; -import java.io.IOException; - /** * Testcase for {@link SolrIndexConfig} * @@ -40,13 +41,19 @@ import java.io.IOException; */ public class SolrIndexConfigTest extends SolrTestCaseJ4 { + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml","schema.xml"); + } + @Test public void testFailingSolrIndexConfigCreation() { try { SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml"); SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); - solrIndexConfig.toIndexWriterConfig(indexSchema); + h.getCore().setLatestSchema(indexSchema); + solrIndexConfig.toIndexWriterConfig(h.getCore()); fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail "); } catch (Exception e) { // it failed as expected @@ -61,8 +68,9 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 { null); assertNotNull(solrIndexConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); - - IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); + + h.getCore().setLatestSchema(indexSchema); + IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore()); assertNotNull("null mp", iwc.getMergePolicy()); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); @@ -87,7 +95,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 { assertEquals(SimpleMergedSegmentWarmer.class.getName(), solrIndexConfig.mergedSegmentWarmerInfo.className); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); - IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); + h.getCore().setLatestSchema(indexSchema); + IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore()); assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass()); }