mirror of https://github.com/apache/lucene.git
LUCENE-6212: remove per-doc analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b5db48c783
commit
1529c57ca1
|
@ -392,6 +392,11 @@ API Changes
|
||||||
Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been
|
Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been
|
||||||
removed and boolean queries now always score in order.
|
removed and boolean queries now always score in order.
|
||||||
|
|
||||||
|
* LUCENE-6212: IndexWriter no longer accepts per-document Analyzer to
|
||||||
|
add/updateDocument. These methods were trappy as they made it
|
||||||
|
easy to accidentally index tokens that were not easily
|
||||||
|
searchable. (Mike McCandless)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
||||||
|
|
|
@ -75,7 +75,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
doc.add(new TextField("title", dummyTitle, Field.Store.YES));
|
doc.add(new TextField("title", dummyTitle, Field.Store.YES));
|
||||||
String dummyContent = "there is some content written here";
|
String dummyContent = "there is some content written here";
|
||||||
doc.add(new TextField("contents", dummyContent, Field.Store.YES));
|
doc.add(new TextField("contents", dummyContent, Field.Store.YES));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
// try the search over the first doc
|
// try the search over the first doc
|
||||||
|
@ -96,7 +96,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
|
doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
|
||||||
String dogmasContents = "white men can't jump";
|
String dogmasContents = "white men can't jump";
|
||||||
doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
|
doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
directoryReader.close();
|
directoryReader.close();
|
||||||
|
|
|
@ -16,11 +16,15 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.classification;
|
package org.apache.lucene.classification;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
|
@ -32,9 +36,6 @@ import org.apache.lucene.util.TestUtil;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for testing {@link Classifier}s
|
* Base class for testing {@link Classifier}s
|
||||||
*/
|
*/
|
||||||
|
@ -113,7 +114,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
|
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
|
||||||
double score = classificationResult.getScore();
|
double score = classificationResult.getScore();
|
||||||
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
|
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
|
||||||
updateSampleIndex(analyzer);
|
updateSampleIndex();
|
||||||
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
|
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
|
||||||
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
|
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
|
||||||
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
|
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
|
||||||
|
@ -125,7 +126,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void populateSampleIndex(Analyzer analyzer) throws IOException {
|
private void populateSampleIndex(Analyzer analyzer) throws IOException {
|
||||||
indexWriter.deleteAll();
|
indexWriter.close();
|
||||||
|
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
|
|
||||||
String text;
|
String text;
|
||||||
|
@ -138,7 +140,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
|
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
|
text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
|
||||||
|
@ -146,7 +148,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "And there's a threshold question that he has to answer for the American people and " +
|
text = "And there's a threshold question that he has to answer for the American people and " +
|
||||||
|
@ -155,7 +157,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
|
text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
|
||||||
|
@ -164,7 +166,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
|
text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
|
||||||
|
@ -173,7 +175,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "So, about all those experts and analysts who've spent the past year or so saying " +
|
text = "So, about all those experts and analysts who've spent the past year or so saying " +
|
||||||
|
@ -181,7 +183,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
|
text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
|
||||||
|
@ -190,12 +192,12 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "unlabeled doc";
|
text = "unlabeled doc";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
}
|
}
|
||||||
|
@ -217,7 +219,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
|
private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
|
||||||
indexWriter.deleteAll();
|
indexWriter.close();
|
||||||
|
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
|
|
||||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||||
|
@ -232,7 +235,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(textFieldName, createRandomString(random), ft));
|
doc.add(new Field(textFieldName, createRandomString(random), ft));
|
||||||
doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
|
doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
|
doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
}
|
}
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
}
|
}
|
||||||
|
@ -246,7 +249,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateSampleIndex(Analyzer analyzer) throws Exception {
|
private void updateSampleIndex() throws Exception {
|
||||||
|
|
||||||
String text;
|
String text;
|
||||||
|
|
||||||
|
@ -256,54 +259,54 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
|
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role.";
|
text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role.";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement";
|
text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Illinois becomes 16th state to allow same-sex marriage.";
|
text = "Illinois becomes 16th state to allow same-sex marriage.";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||||
doc.add(new Field(booleanFieldName, "true", ft));
|
doc.add(new Field(booleanFieldName, "true", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report.";
|
text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report.";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement.";
|
text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement.";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009.";
|
text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009.";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||||
doc.add(new Field(booleanFieldName, "false", ft));
|
doc.add(new Field(booleanFieldName, "false", ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
text = "second unlabeled doc";
|
text = "second unlabeled doc";
|
||||||
doc.add(new Field(textFieldName, text, ft));
|
doc.add(new Field(textFieldName, text, ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
|
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,8 +64,6 @@ public class DataSplitterTest extends LuceneTestCase {
|
||||||
ft.setStoreTermVectorOffsets(true);
|
ft.setStoreTermVectorOffsets(true);
|
||||||
ft.setStoreTermVectorPositions(true);
|
ft.setStoreTermVectorPositions(true);
|
||||||
|
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
Random rnd = random();
|
Random rnd = random();
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
|
@ -73,7 +71,7 @@ public class DataSplitterTest extends LuceneTestCase {
|
||||||
doc.add(new Field(idFieldName, Integer.toString(i), ft));
|
doc.add(new Field(idFieldName, Integer.toString(i), ft));
|
||||||
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
|
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
|
||||||
doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft));
|
doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
|
|
|
@ -55,14 +55,12 @@ public class DocToDoubleVectorUtilsTest extends LuceneTestCase {
|
||||||
ft.setStoreTermVectorOffsets(true);
|
ft.setStoreTermVectorOffsets(true);
|
||||||
ft.setStoreTermVectorPositions(true);
|
ft.setStoreTermVectorPositions(true);
|
||||||
|
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("id", Integer.toString(i), ft));
|
doc.add(new Field("id", Integer.toString(i), ft));
|
||||||
doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
|
doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
|
||||||
indexWriter.addDocument(doc, analyzer);
|
indexWriter.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
|
|
|
@ -1127,22 +1127,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public void addDocument(IndexDocument doc) throws IOException {
|
public void addDocument(IndexDocument doc) throws IOException {
|
||||||
addDocument(doc, analyzer);
|
updateDocument(null, doc);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a document to this index, using the provided analyzer instead of the
|
|
||||||
* value of {@link #getAnalyzer()}.
|
|
||||||
*
|
|
||||||
* <p>See {@link #addDocument(IndexDocument)} for details on
|
|
||||||
* index and IndexWriter state after an Exception, and
|
|
||||||
* flushing/merging temporary free space requirements.</p>
|
|
||||||
*
|
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*/
|
|
||||||
public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException {
|
|
||||||
updateDocument(null, doc, analyzer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1183,22 +1168,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public void addDocuments(Iterable<? extends IndexDocument> docs) throws IOException {
|
public void addDocuments(Iterable<? extends IndexDocument> docs) throws IOException {
|
||||||
addDocuments(docs, analyzer);
|
updateDocuments(null, docs);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Atomically adds a block of documents, analyzed using the
|
|
||||||
* provided analyzer, with sequentially assigned document
|
|
||||||
* IDs, such that an external reader will see all or none
|
|
||||||
* of the documents.
|
|
||||||
*
|
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public void addDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
|
|
||||||
updateDocuments(null, docs, analyzer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1215,24 +1185,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs) throws IOException {
|
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs) throws IOException {
|
||||||
updateDocuments(delTerm, docs, analyzer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Atomically deletes documents matching the provided
|
|
||||||
* delTerm and adds a block of documents, analyzed using
|
|
||||||
* the provided analyzer, with sequentially
|
|
||||||
* assigned document IDs, such that an external reader
|
|
||||||
* will see all or none of the documents.
|
|
||||||
*
|
|
||||||
* See {@link #addDocuments(Iterable)}.
|
|
||||||
*
|
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
|
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
try {
|
try {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
@ -1384,26 +1336,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
*/
|
*/
|
||||||
public void updateDocument(Term term, IndexDocument doc) throws IOException {
|
public void updateDocument(Term term, IndexDocument doc) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
updateDocument(term, doc, analyzer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Updates a document by first deleting the document(s)
|
|
||||||
* containing <code>term</code> and then adding the new
|
|
||||||
* document. The delete and then add are atomic as seen
|
|
||||||
* by a reader on the same index (flush may happen only after
|
|
||||||
* the add).
|
|
||||||
*
|
|
||||||
* @param term the term to identify the document(s) to be
|
|
||||||
* deleted
|
|
||||||
* @param doc the document to be added
|
|
||||||
* @param analyzer the analyzer to use when analyzing the document
|
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*/
|
|
||||||
public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer)
|
|
||||||
throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
try {
|
try {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -47,15 +47,6 @@ public class TrackingIndexWriter {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calls {@link
|
|
||||||
* IndexWriter#updateDocument(Term,IndexDocument,Analyzer)}
|
|
||||||
* and returns the generation that reflects this change. */
|
|
||||||
public long updateDocument(Term t, IndexDocument d, Analyzer a) throws IOException {
|
|
||||||
writer.updateDocument(t, d, a);
|
|
||||||
// Return gen as of when indexing finished:
|
|
||||||
return indexingGen.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Calls {@link
|
/** Calls {@link
|
||||||
* IndexWriter#updateDocument(Term,IndexDocument)} and
|
* IndexWriter#updateDocument(Term,IndexDocument)} and
|
||||||
* returns the generation that reflects this change. */
|
* returns the generation that reflects this change. */
|
||||||
|
@ -65,15 +56,6 @@ public class TrackingIndexWriter {
|
||||||
return indexingGen.get();
|
return indexingGen.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calls {@link
|
|
||||||
* IndexWriter#updateDocuments(Term,Iterable,Analyzer)}
|
|
||||||
* and returns the generation that reflects this change. */
|
|
||||||
public long updateDocuments(Term t, Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
|
|
||||||
writer.updateDocuments(t, docs, a);
|
|
||||||
// Return gen as of when indexing finished:
|
|
||||||
return indexingGen.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Calls {@link
|
/** Calls {@link
|
||||||
* IndexWriter#updateDocuments(Term,Iterable)} and returns
|
* IndexWriter#updateDocuments(Term,Iterable)} and returns
|
||||||
* the generation that reflects this change. */
|
* the generation that reflects this change. */
|
||||||
|
@ -123,24 +105,6 @@ public class TrackingIndexWriter {
|
||||||
return indexingGen.get();
|
return indexingGen.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calls {@link
|
|
||||||
* IndexWriter#addDocument(IndexDocument,Analyzer)} and
|
|
||||||
* returns the generation that reflects this change. */
|
|
||||||
public long addDocument(IndexDocument d, Analyzer a) throws IOException {
|
|
||||||
writer.addDocument(d, a);
|
|
||||||
// Return gen as of when indexing finished:
|
|
||||||
return indexingGen.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Calls {@link
|
|
||||||
* IndexWriter#addDocuments(Iterable,Analyzer)} and
|
|
||||||
* returns the generation that reflects this change. */
|
|
||||||
public long addDocuments(Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
|
|
||||||
writer.addDocuments(docs, a);
|
|
||||||
// Return gen as of when indexing finished:
|
|
||||||
return indexingGen.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Calls {@link IndexWriter#addDocument(IndexDocument)}
|
/** Calls {@link IndexWriter#addDocument(IndexDocument)}
|
||||||
* and returns the generation that reflects this change. */
|
* and returns the generation that reflects this change. */
|
||||||
public long addDocument(IndexDocument d) throws IOException {
|
public long addDocument(IndexDocument d) throws IOException {
|
||||||
|
|
|
@ -305,7 +305,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory());
|
final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a);
|
||||||
final Document doc = new Document();
|
final Document doc = new Document();
|
||||||
final FieldType ft = new FieldType();
|
final FieldType ft = new FieldType();
|
||||||
ft.setIndexOptions(IndexOptions.DOCS);
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
@ -315,7 +315,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
||||||
ft.setStoreTermVectorOffsets(true);
|
ft.setStoreTermVectorOffsets(true);
|
||||||
doc.add(new Field("f", "a", ft));
|
doc.add(new Field("f", "a", ft));
|
||||||
doc.add(new Field("f", "a", ft));
|
doc.add(new Field("f", "a", ft));
|
||||||
writer.addDocument(doc, a);
|
writer.addDocument(doc);
|
||||||
final LeafReader reader = getOnlySegmentReader(writer.getReader());
|
final LeafReader reader = getOnlySegmentReader(writer.getReader());
|
||||||
final Fields fields = reader.getTermVectors(0);
|
final Fields fields = reader.getTermVectors(0);
|
||||||
final Terms terms = fields.terms("f");
|
final Terms terms = fields.terms("f");
|
||||||
|
|
|
@ -392,27 +392,35 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
// LUCENE-1208
|
// LUCENE-1208
|
||||||
public void testExceptionJustBeforeFlush() throws IOException {
|
public void testExceptionJustBeforeFlush() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
|
|
||||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
final AtomicBoolean doCrash = new AtomicBoolean();
|
||||||
.setMaxBufferedDocs(2),
|
|
||||||
new TestPoint1());
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(newTextField("field", "a field", Field.Store.YES));
|
|
||||||
w.addDocument(doc);
|
|
||||||
|
|
||||||
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
|
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
|
||||||
@Override
|
@Override
|
||||||
public TokenStreamComponents createComponents(String fieldName) {
|
public TokenStreamComponents createComponents(String fieldName) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||||
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
TokenStream stream = tokenizer;
|
||||||
|
if (doCrash.get()) {
|
||||||
|
stream = new CrashingFilter(fieldName, stream);
|
||||||
|
}
|
||||||
|
return new TokenStreamComponents(tokenizer, stream);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
|
||||||
|
newIndexWriterConfig(analyzer)
|
||||||
|
.setMaxBufferedDocs(2),
|
||||||
|
new TestPoint1());
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newTextField("field", "a field", Field.Store.YES));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
Document crashDoc = new Document();
|
Document crashDoc = new Document();
|
||||||
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
|
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
|
||||||
|
doCrash.set(true);
|
||||||
try {
|
try {
|
||||||
w.addDocument(crashDoc, analyzer);
|
w.addDocument(crashDoc);
|
||||||
fail("did not hit expected exception");
|
fail("did not hit expected exception");
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
// expected
|
// expected
|
||||||
|
|
|
@ -42,521 +42,520 @@ import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestPayloads extends LuceneTestCase {
|
public class TestPayloads extends LuceneTestCase {
|
||||||
|
|
||||||
// Simple tests to test the Payload class
|
// Simple tests to test the Payload class
|
||||||
public void testPayload() throws Exception {
|
public void testPayload() throws Exception {
|
||||||
BytesRef payload = new BytesRef("This is a test!");
|
BytesRef payload = new BytesRef("This is a test!");
|
||||||
assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
|
assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
|
||||||
|
|
||||||
BytesRef clone = payload.clone();
|
BytesRef clone = payload.clone();
|
||||||
assertEquals(payload.length, clone.length);
|
assertEquals(payload.length, clone.length);
|
||||||
for (int i = 0; i < payload.length; i++) {
|
for (int i = 0; i < payload.length; i++) {
|
||||||
assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]);
|
assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests whether the DocumentWriter and SegmentMerger correctly enable the
|
// Tests whether the DocumentWriter and SegmentMerger correctly enable the
|
||||||
// payload bit in the FieldInfo
|
// payload bit in the FieldInfo
|
||||||
public void testPayloadFieldBit() throws Exception {
|
public void testPayloadFieldBit() throws Exception {
|
||||||
Directory ram = newDirectory();
|
Directory ram = newDirectory();
|
||||||
PayloadAnalyzer analyzer = new PayloadAnalyzer();
|
PayloadAnalyzer analyzer = new PayloadAnalyzer();
|
||||||
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
|
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
// this field won't have any payloads
|
// this field won't have any payloads
|
||||||
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
|
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
|
||||||
// this field will have payloads in all docs, however not for all term positions,
|
// this field will have payloads in all docs, however not for all term positions,
|
||||||
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
|
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
|
||||||
// even if only some term positions have payloads
|
// even if only some term positions have payloads
|
||||||
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
||||||
d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO));
|
d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO));
|
||||||
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
|
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
|
||||||
// enabled in only some documents
|
// enabled in only some documents
|
||||||
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
||||||
// only add payload data for field f2
|
// only add payload data for field f2
|
||||||
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
|
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
// flush
|
// flush
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
|
SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
|
||||||
FieldInfos fi = reader.getFieldInfos();
|
FieldInfos fi = reader.getFieldInfos();
|
||||||
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
|
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
|
||||||
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
|
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
|
||||||
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads());
|
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads());
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
// now we add another document which has payloads for field f3 and verify if the SegmentMerger
|
// now we add another document which has payloads for field f3 and verify if the SegmentMerger
|
||||||
// enabled payloads for that field
|
// enabled payloads for that field
|
||||||
analyzer = new PayloadAnalyzer(); // Clear payload state for each field
|
analyzer = new PayloadAnalyzer(); // Clear payload state for each field
|
||||||
writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
|
writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
|
||||||
.setOpenMode(OpenMode.CREATE));
|
.setOpenMode(OpenMode.CREATE));
|
||||||
d = new Document();
|
d = new Document();
|
||||||
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
|
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
|
||||||
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
||||||
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
||||||
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
||||||
// add payload data for field f2 and f3
|
// add payload data for field f2 and f3
|
||||||
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
|
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
|
||||||
analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3);
|
analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3);
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
|
||||||
// force merge
|
// force merge
|
||||||
writer.forceMerge(1);
|
writer.forceMerge(1);
|
||||||
// flush
|
// flush
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
reader = getOnlySegmentReader(DirectoryReader.open(ram));
|
reader = getOnlySegmentReader(DirectoryReader.open(ram));
|
||||||
fi = reader.getFieldInfos();
|
fi = reader.getFieldInfos();
|
||||||
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
|
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
|
||||||
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
|
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
|
||||||
assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads());
|
assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads());
|
||||||
reader.close();
|
reader.close();
|
||||||
ram.close();
|
ram.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory
|
// Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory
|
||||||
public void testPayloadsEncoding() throws Exception {
|
public void testPayloadsEncoding() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
performTest(dir);
|
performTest(dir);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// builds an index with payloads in the given Directory and performs
|
// builds an index with payloads in the given Directory and performs
|
||||||
// different tests to verify the payload encoding
|
// different tests to verify the payload encoding
|
||||||
private void performTest(Directory dir) throws Exception {
|
private void performTest(Directory dir) throws Exception {
|
||||||
PayloadAnalyzer analyzer = new PayloadAnalyzer();
|
PayloadAnalyzer analyzer = new PayloadAnalyzer();
|
||||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
|
||||||
.setOpenMode(OpenMode.CREATE)
|
.setOpenMode(OpenMode.CREATE)
|
||||||
.setMergePolicy(newLogMergePolicy()));
|
.setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
// should be in sync with value in TermInfosWriter
|
// should be in sync with value in TermInfosWriter
|
||||||
final int skipInterval = 16;
|
final int skipInterval = 16;
|
||||||
|
|
||||||
final int numTerms = 5;
|
final int numTerms = 5;
|
||||||
final String fieldName = "f1";
|
final String fieldName = "f1";
|
||||||
|
|
||||||
int numDocs = skipInterval + 1;
|
int numDocs = skipInterval + 1;
|
||||||
// create content for the test documents with just a few terms
|
// create content for the test documents with just a few terms
|
||||||
Term[] terms = generateTerms(fieldName, numTerms);
|
Term[] terms = generateTerms(fieldName, numTerms);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i = 0; i < terms.length; i++) {
|
for (int i = 0; i < terms.length; i++) {
|
||||||
sb.append(terms[i].text());
|
sb.append(terms[i].text());
|
||||||
sb.append(" ");
|
sb.append(" ");
|
||||||
}
|
}
|
||||||
String content = sb.toString();
|
String content = sb.toString();
|
||||||
|
|
||||||
|
|
||||||
int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
|
int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
|
||||||
byte[] payloadData = generateRandomData(payloadDataLength);
|
byte[] payloadData = generateRandomData(payloadDataLength);
|
||||||
|
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
d.add(newTextField(fieldName, content, Field.Store.NO));
|
d.add(newTextField(fieldName, content, Field.Store.NO));
|
||||||
// add the same document multiple times to have the same payload lengths for all
|
// add the same document multiple times to have the same payload lengths for all
|
||||||
// occurrences within two consecutive skip intervals
|
// occurrences within two consecutive skip intervals
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (int i = 0; i < 2 * numDocs; i++) {
|
for (int i = 0; i < 2 * numDocs; i++) {
|
||||||
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
|
analyzer.setPayloadData(fieldName, payloadData, offset, 1);
|
||||||
offset += numTerms;
|
offset += numTerms;
|
||||||
writer.addDocument(d, analyzer);
|
writer.addDocument(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure we create more than one segment to test merging
|
// make sure we create more than one segment to test merging
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
// now we make sure to have different payload lengths next at the next skip point
|
// now we make sure to have different payload lengths next at the next skip point
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
|
analyzer.setPayloadData(fieldName, payloadData, offset, i);
|
||||||
offset += i * numTerms;
|
offset += i * numTerms;
|
||||||
writer.addDocument(d, analyzer);
|
writer.addDocument(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.forceMerge(1);
|
writer.forceMerge(1);
|
||||||
// flush
|
// flush
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify the index
|
* Verify the index
|
||||||
* first we test if all payloads are stored correctly
|
* first we test if all payloads are stored correctly
|
||||||
*/
|
*/
|
||||||
IndexReader reader = DirectoryReader.open(dir);
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
|
|
||||||
byte[] verifyPayloadData = new byte[payloadDataLength];
|
byte[] verifyPayloadData = new byte[payloadDataLength];
|
||||||
offset = 0;
|
offset = 0;
|
||||||
DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms];
|
DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms];
|
||||||
for (int i = 0; i < numTerms; i++) {
|
for (int i = 0; i < numTerms; i++) {
|
||||||
tps[i] = MultiFields.getTermPositionsEnum(reader,
|
tps[i] = MultiFields.getTermPositionsEnum(reader,
|
||||||
MultiFields.getLiveDocs(reader),
|
MultiFields.getLiveDocs(reader),
|
||||||
terms[i].field(),
|
terms[i].field(),
|
||||||
new BytesRef(terms[i].text()));
|
new BytesRef(terms[i].text()));
|
||||||
}
|
}
|
||||||
|
|
||||||
while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
for (int i = 1; i < numTerms; i++) {
|
for (int i = 1; i < numTerms; i++) {
|
||||||
tps[i].nextDoc();
|
tps[i].nextDoc();
|
||||||
}
|
}
|
||||||
int freq = tps[0].freq();
|
int freq = tps[0].freq();
|
||||||
|
|
||||||
for (int i = 0; i < freq; i++) {
|
for (int i = 0; i < freq; i++) {
|
||||||
for (int j = 0; j < numTerms; j++) {
|
for (int j = 0; j < numTerms; j++) {
|
||||||
tps[j].nextPosition();
|
tps[j].nextPosition();
|
||||||
BytesRef br = tps[j].getPayload();
|
BytesRef br = tps[j].getPayload();
|
||||||
if (br != null) {
|
if (br != null) {
|
||||||
System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length);
|
System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length);
|
||||||
offset += br.length;
|
offset += br.length;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assertByteArrayEquals(payloadData, verifyPayloadData);
|
assertByteArrayEquals(payloadData, verifyPayloadData);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* test lazy skipping
|
* test lazy skipping
|
||||||
*/
|
*/
|
||||||
DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader,
|
DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader,
|
||||||
MultiFields.getLiveDocs(reader),
|
MultiFields.getLiveDocs(reader),
|
||||||
terms[0].field(),
|
terms[0].field(),
|
||||||
new BytesRef(terms[0].text()));
|
new BytesRef(terms[0].text()));
|
||||||
tp.nextDoc();
|
tp.nextDoc();
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
// NOTE: prior rev of this test was failing to first
|
// NOTE: prior rev of this test was failing to first
|
||||||
// call next here:
|
// call next here:
|
||||||
tp.nextDoc();
|
tp.nextDoc();
|
||||||
// now we don't read this payload
|
// now we don't read this payload
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
BytesRef payload = tp.getPayload();
|
BytesRef payload = tp.getPayload();
|
||||||
assertEquals("Wrong payload length.", 1, payload.length);
|
assertEquals("Wrong payload length.", 1, payload.length);
|
||||||
assertEquals(payload.bytes[payload.offset], payloadData[numTerms]);
|
assertEquals(payload.bytes[payload.offset], payloadData[numTerms]);
|
||||||
tp.nextDoc();
|
tp.nextDoc();
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
|
|
||||||
// we don't read this payload and skip to a different document
|
// we don't read this payload and skip to a different document
|
||||||
tp.advance(5);
|
tp.advance(5);
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
payload = tp.getPayload();
|
payload = tp.getPayload();
|
||||||
assertEquals("Wrong payload length.", 1, payload.length);
|
assertEquals("Wrong payload length.", 1, payload.length);
|
||||||
assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]);
|
assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test different lengths at skip points
|
* Test different lengths at skip points
|
||||||
*/
|
*/
|
||||||
tp = MultiFields.getTermPositionsEnum(reader,
|
tp = MultiFields.getTermPositionsEnum(reader,
|
||||||
MultiFields.getLiveDocs(reader),
|
MultiFields.getLiveDocs(reader),
|
||||||
terms[1].field(),
|
terms[1].field(),
|
||||||
new BytesRef(terms[1].text()));
|
new BytesRef(terms[1].text()));
|
||||||
tp.nextDoc();
|
tp.nextDoc();
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
||||||
tp.advance(skipInterval - 1);
|
tp.advance(skipInterval - 1);
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
||||||
tp.advance(2 * skipInterval - 1);
|
tp.advance(2 * skipInterval - 1);
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
assertEquals("Wrong payload length.", 1, tp.getPayload().length);
|
||||||
tp.advance(3 * skipInterval - 1);
|
tp.advance(3 * skipInterval - 1);
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length);
|
assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length);
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
// test long payload
|
// test long payload
|
||||||
analyzer = new PayloadAnalyzer();
|
analyzer = new PayloadAnalyzer();
|
||||||
writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
|
writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
|
||||||
.setOpenMode(OpenMode.CREATE));
|
.setOpenMode(OpenMode.CREATE));
|
||||||
String singleTerm = "lucene";
|
String singleTerm = "lucene";
|
||||||
|
|
||||||
d = new Document();
|
d = new Document();
|
||||||
d.add(newTextField(fieldName, singleTerm, Field.Store.NO));
|
d.add(newTextField(fieldName, singleTerm, Field.Store.NO));
|
||||||
// add a payload whose length is greater than the buffer size of BufferedIndexOutput
|
// add a payload whose length is greater than the buffer size of BufferedIndexOutput
|
||||||
payloadData = generateRandomData(2000);
|
payloadData = generateRandomData(2000);
|
||||||
analyzer.setPayloadData(fieldName, payloadData, 100, 1500);
|
analyzer.setPayloadData(fieldName, payloadData, 100, 1500);
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
|
||||||
|
|
||||||
writer.forceMerge(1);
|
writer.forceMerge(1);
|
||||||
// flush
|
// flush
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
reader = DirectoryReader.open(dir);
|
reader = DirectoryReader.open(dir);
|
||||||
tp = MultiFields.getTermPositionsEnum(reader,
|
tp = MultiFields.getTermPositionsEnum(reader,
|
||||||
MultiFields.getLiveDocs(reader),
|
MultiFields.getLiveDocs(reader),
|
||||||
fieldName,
|
fieldName,
|
||||||
new BytesRef(singleTerm));
|
new BytesRef(singleTerm));
|
||||||
tp.nextDoc();
|
tp.nextDoc();
|
||||||
tp.nextPosition();
|
tp.nextPosition();
|
||||||
|
|
||||||
BytesRef br = tp.getPayload();
|
BytesRef br = tp.getPayload();
|
||||||
verifyPayloadData = new byte[br.length];
|
verifyPayloadData = new byte[br.length];
|
||||||
byte[] portion = new byte[1500];
|
byte[] portion = new byte[1500];
|
||||||
System.arraycopy(payloadData, 100, portion, 0, 1500);
|
System.arraycopy(payloadData, 100, portion, 0, 1500);
|
||||||
|
|
||||||
assertByteArrayEquals(portion, br.bytes, br.offset, br.length);
|
assertByteArrayEquals(portion, br.bytes, br.offset, br.length);
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static final Charset utf8 = StandardCharsets.UTF_8;
|
static final Charset utf8 = StandardCharsets.UTF_8;
|
||||||
|
|
||||||
private void generateRandomData(byte[] data) {
|
private void generateRandomData(byte[] data) {
|
||||||
// this test needs the random data to be valid unicode
|
// this test needs the random data to be valid unicode
|
||||||
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
|
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
|
||||||
byte b[] = s.getBytes(utf8);
|
byte b[] = s.getBytes(utf8);
|
||||||
assert b.length == data.length;
|
assert b.length == data.length;
|
||||||
System.arraycopy(b, 0, data, 0, b.length);
|
System.arraycopy(b, 0, data, 0, b.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] generateRandomData(int n) {
|
private byte[] generateRandomData(int n) {
|
||||||
byte[] data = new byte[n];
|
byte[] data = new byte[n];
|
||||||
generateRandomData(data);
|
generateRandomData(data);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Term[] generateTerms(String fieldName, int n) {
|
private Term[] generateTerms(String fieldName, int n) {
|
||||||
int maxDigits = (int) (Math.log(n) / Math.log(10));
|
int maxDigits = (int) (Math.log(n) / Math.log(10));
|
||||||
Term[] terms = new Term[n];
|
Term[] terms = new Term[n];
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
sb.setLength(0);
|
sb.setLength(0);
|
||||||
sb.append("t");
|
sb.append("t");
|
||||||
int zeros = maxDigits - (int) (Math.log(i) / Math.log(10));
|
int zeros = maxDigits - (int) (Math.log(i) / Math.log(10));
|
||||||
for (int j = 0; j < zeros; j++) {
|
for (int j = 0; j < zeros; j++) {
|
||||||
sb.append("0");
|
sb.append("0");
|
||||||
}
|
}
|
||||||
sb.append(i);
|
sb.append(i);
|
||||||
terms[i] = new Term(fieldName, sb.toString());
|
terms[i] = new Term(fieldName, sb.toString());
|
||||||
}
|
|
||||||
return terms;
|
|
||||||
}
|
}
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void assertByteArrayEquals(byte[] b1, byte[] b2) {
|
void assertByteArrayEquals(byte[] b1, byte[] b2) {
|
||||||
if (b1.length != b2.length) {
|
if (b1.length != b2.length) {
|
||||||
fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length);
|
fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < b1.length; i++) {
|
for (int i = 0; i < b1.length; i++) {
|
||||||
if (b1[i] != b2[i]) {
|
if (b1[i] != b2[i]) {
|
||||||
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]);
|
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void assertByteArrayEquals(byte[] b1, byte[] b2, int b2offset, int b2length) {
|
void assertByteArrayEquals(byte[] b1, byte[] b2, int b2offset, int b2length) {
|
||||||
if (b1.length != b2length) {
|
if (b1.length != b2length) {
|
||||||
fail("Byte arrays have different lengths: " + b1.length + ", " + b2length);
|
fail("Byte arrays have different lengths: " + b1.length + ", " + b2length);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < b1.length; i++) {
|
for (int i = 0; i < b1.length; i++) {
|
||||||
if (b1[i] != b2[b2offset+i]) {
|
if (b1[i] != b2[b2offset+i]) {
|
||||||
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]);
|
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
static class PayloadData {
|
||||||
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
|
byte[] data;
|
||||||
*/
|
int offset;
|
||||||
private static class PayloadAnalyzer extends Analyzer {
|
int length;
|
||||||
Map<String,PayloadData> fieldToData = new HashMap<>();
|
|
||||||
|
|
||||||
public PayloadAnalyzer() {
|
PayloadData(byte[] data, int offset, int length) {
|
||||||
super(PER_FIELD_REUSE_STRATEGY);
|
this.data = data;
|
||||||
}
|
this.offset = offset;
|
||||||
|
this.length = length;
|
||||||
public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
|
}
|
||||||
super(PER_FIELD_REUSE_STRATEGY);
|
}
|
||||||
setPayloadData(field, data, offset, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setPayloadData(String field, byte[] data, int offset, int length) {
|
/**
|
||||||
fieldToData.put(field, new PayloadData(data, offset, length));
|
* This Analyzer uses an MockTokenizer and PayloadFilter.
|
||||||
}
|
*/
|
||||||
|
private static class PayloadAnalyzer extends Analyzer {
|
||||||
@Override
|
Map<String,PayloadData> fieldToData = new HashMap<>();
|
||||||
public TokenStreamComponents createComponents(String fieldName) {
|
|
||||||
PayloadData payload = fieldToData.get(fieldName);
|
|
||||||
Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
|
||||||
TokenStream tokenStream = (payload != null) ?
|
|
||||||
new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts;
|
|
||||||
return new TokenStreamComponents(ts, tokenStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class PayloadData {
|
|
||||||
byte[] data;
|
|
||||||
int offset;
|
|
||||||
int length;
|
|
||||||
|
|
||||||
PayloadData(byte[] data, int offset, int length) {
|
public PayloadAnalyzer() {
|
||||||
this.data = data;
|
super(PER_FIELD_REUSE_STRATEGY);
|
||||||
this.offset = offset;
|
}
|
||||||
this.length = length;
|
|
||||||
}
|
public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
|
||||||
}
|
super(PER_FIELD_REUSE_STRATEGY);
|
||||||
|
setPayloadData(field, data, offset, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setPayloadData(String field, byte[] data, int offset, int length) {
|
||||||
|
fieldToData.put(field, new PayloadData(data, offset, length));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
PayloadData payload = fieldToData.get(fieldName);
|
||||||
|
Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
|
TokenStream tokenStream = (payload != null) ?
|
||||||
|
new PayloadFilter(ts, fieldName, fieldToData) : ts;
|
||||||
|
return new TokenStreamComponents(ts, tokenStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This Filter adds payloads to the tokens.
|
* This Filter adds payloads to the tokens.
|
||||||
*/
|
*/
|
||||||
private static class PayloadFilter extends TokenFilter {
|
private static class PayloadFilter extends TokenFilter {
|
||||||
private byte[] data;
|
PayloadAttribute payloadAtt;
|
||||||
private int length;
|
CharTermAttribute termAttribute;
|
||||||
private int offset;
|
private Map<String,PayloadData> fieldToData;
|
||||||
private int startOffset;
|
private String fieldName;
|
||||||
PayloadAttribute payloadAtt;
|
private PayloadData payloadData;
|
||||||
CharTermAttribute termAttribute;
|
private int offset;
|
||||||
|
|
||||||
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
|
public PayloadFilter(TokenStream in, String fieldName, Map<String,PayloadData> fieldToData) {
|
||||||
super(in);
|
super(in);
|
||||||
this.data = data;
|
this.fieldToData = fieldToData;
|
||||||
this.length = length;
|
this.fieldName = fieldName;
|
||||||
this.offset = offset;
|
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||||
this.startOffset = offset;
|
termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
}
|
||||||
termAttribute = addAttribute(CharTermAttribute.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
boolean hasNext = input.incrementToken();
|
boolean hasNext = input.incrementToken();
|
||||||
if (!hasNext) {
|
if (!hasNext) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some values of the same field are to have payloads and others not
|
// Some values of the same field are to have payloads and others not
|
||||||
if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
|
if (offset + payloadData.length <= payloadData.data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
|
||||||
BytesRef p = new BytesRef(data, offset, length);
|
BytesRef p = new BytesRef(payloadData.data, offset, payloadData.length);
|
||||||
payloadAtt.setPayload(p);
|
payloadAtt.setPayload(p);
|
||||||
offset += length;
|
offset += payloadData.length;
|
||||||
} else {
|
} else {
|
||||||
payloadAtt.setPayload(null);
|
payloadAtt.setPayload(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
this.payloadData = fieldToData.get(fieldName);
|
||||||
|
this.offset = payloadData.offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testThreadSafety() throws Exception {
|
||||||
|
final int numThreads = 5;
|
||||||
|
final int numDocs = atLeast(50);
|
||||||
|
final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
final String field = "test";
|
||||||
|
|
||||||
|
Thread[] ingesters = new Thread[numThreads];
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
ingesters[i] = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
for (int j = 0; j < numDocs; j++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new TextField(field, new PoolingPayloadTokenStream(pool)));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ingesters[i].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
ingesters[i].join();
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
|
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null);
|
||||||
|
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||||
|
DocsAndPositionsEnum tp = null;
|
||||||
|
while (terms.next() != null) {
|
||||||
|
String termText = terms.term().utf8ToString();
|
||||||
|
tp = terms.docsAndPositions(liveDocs, tp);
|
||||||
|
while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
int freq = tp.freq();
|
||||||
|
for (int i = 0; i < freq; i++) {
|
||||||
|
tp.nextPosition();
|
||||||
|
final BytesRef payload = tp.getPayload();
|
||||||
|
assertEquals(termText, payload.utf8ToString());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
assertEquals(pool.size(), numThreads);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class PoolingPayloadTokenStream extends TokenStream {
|
||||||
|
private byte[] payload;
|
||||||
|
private boolean first;
|
||||||
|
private ByteArrayPool pool;
|
||||||
|
private String term;
|
||||||
|
|
||||||
@Override
|
CharTermAttribute termAtt;
|
||||||
public void reset() throws IOException {
|
PayloadAttribute payloadAtt;
|
||||||
super.reset();
|
|
||||||
this.offset = startOffset;
|
PoolingPayloadTokenStream(ByteArrayPool pool) {
|
||||||
|
this.pool = pool;
|
||||||
|
payload = pool.get();
|
||||||
|
generateRandomData(payload);
|
||||||
|
term = new String(payload, 0, payload.length, utf8);
|
||||||
|
first = true;
|
||||||
|
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||||
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (!first) return false;
|
||||||
|
first = false;
|
||||||
|
clearAttributes();
|
||||||
|
termAtt.append(term);
|
||||||
|
payloadAtt.setPayload(new BytesRef(payload));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
pool.release(payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ByteArrayPool {
|
||||||
|
private List<byte[]> pool;
|
||||||
|
|
||||||
|
ByteArrayPool(int capacity, int size) {
|
||||||
|
pool = new ArrayList<>();
|
||||||
|
for (int i = 0; i < capacity; i++) {
|
||||||
|
pool.add(new byte[size]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testThreadSafety() throws Exception {
|
synchronized byte[] get() {
|
||||||
final int numThreads = 5;
|
return pool.remove(0);
|
||||||
final int numDocs = atLeast(50);
|
|
||||||
final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);
|
|
||||||
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
|
||||||
final String field = "test";
|
|
||||||
|
|
||||||
Thread[] ingesters = new Thread[numThreads];
|
|
||||||
for (int i = 0; i < numThreads; i++) {
|
|
||||||
ingesters[i] = new Thread() {
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
for (int j = 0; j < numDocs; j++) {
|
|
||||||
Document d = new Document();
|
|
||||||
d.add(new TextField(field, new PoolingPayloadTokenStream(pool)));
|
|
||||||
writer.addDocument(d);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
fail(e.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
ingesters[i].start();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < numThreads; i++) {
|
|
||||||
ingesters[i].join();
|
|
||||||
}
|
|
||||||
writer.close();
|
|
||||||
IndexReader reader = DirectoryReader.open(dir);
|
|
||||||
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null);
|
|
||||||
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
|
||||||
DocsAndPositionsEnum tp = null;
|
|
||||||
while (terms.next() != null) {
|
|
||||||
String termText = terms.term().utf8ToString();
|
|
||||||
tp = terms.docsAndPositions(liveDocs, tp);
|
|
||||||
while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
int freq = tp.freq();
|
|
||||||
for (int i = 0; i < freq; i++) {
|
|
||||||
tp.nextPosition();
|
|
||||||
final BytesRef payload = tp.getPayload();
|
|
||||||
assertEquals(termText, payload.utf8ToString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
assertEquals(pool.size(), numThreads);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PoolingPayloadTokenStream extends TokenStream {
|
|
||||||
private byte[] payload;
|
|
||||||
private boolean first;
|
|
||||||
private ByteArrayPool pool;
|
|
||||||
private String term;
|
|
||||||
|
|
||||||
CharTermAttribute termAtt;
|
|
||||||
PayloadAttribute payloadAtt;
|
|
||||||
|
|
||||||
PoolingPayloadTokenStream(ByteArrayPool pool) {
|
|
||||||
this.pool = pool;
|
|
||||||
payload = pool.get();
|
|
||||||
generateRandomData(payload);
|
|
||||||
term = new String(payload, 0, payload.length, utf8);
|
|
||||||
first = true;
|
|
||||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean incrementToken() throws IOException {
|
|
||||||
if (!first) return false;
|
|
||||||
first = false;
|
|
||||||
clearAttributes();
|
|
||||||
termAtt.append(term);
|
|
||||||
payloadAtt.setPayload(new BytesRef(payload));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
pool.release(payload);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
synchronized void release(byte[] b) {
|
||||||
|
pool.add(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ByteArrayPool {
|
|
||||||
private List<byte[]> pool;
|
|
||||||
|
|
||||||
ByteArrayPool(int capacity, int size) {
|
synchronized int size() {
|
||||||
pool = new ArrayList<>();
|
return pool.size();
|
||||||
for (int i = 0; i < capacity; i++) {
|
|
||||||
pool.add(new byte[size]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized byte[] get() {
|
|
||||||
return pool.remove(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void release(byte[] b) {
|
|
||||||
pool.add(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized int size() {
|
|
||||||
return pool.size();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testAcrossFields() throws Exception {
|
public void testAcrossFields() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
@ -646,5 +645,4 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -390,9 +390,9 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateDocument(Term term,
|
public void updateDocument(Term term,
|
||||||
IndexDocument doc, Analyzer analyzer)
|
IndexDocument doc)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super.updateDocument(term, doc, analyzer);
|
super.updateDocument(term, doc);
|
||||||
try {
|
try {
|
||||||
if (waitAfterUpdate) {
|
if (waitAfterUpdate) {
|
||||||
signal.countDown();
|
signal.countDown();
|
||||||
|
|
|
@ -2040,22 +2040,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO));
|
doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO));
|
||||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
|
doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO));
|
doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO));
|
||||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
|
doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO));
|
doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO));
|
||||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
|
doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO));
|
doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO));
|
||||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
|
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
|
||||||
writer.addDocument(doc, analyzer);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
Document childDoc = doc(FIELD_NAME, "child document");
|
Document childDoc = doc(FIELD_NAME, "child document");
|
||||||
Document parentDoc = doc(FIELD_NAME, "parent document");
|
Document parentDoc = doc(FIELD_NAME, "parent document");
|
||||||
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
||||||
* will be sorted while segments resulting from a flush will be in the order
|
* will be sorted while segments resulting from a flush will be in the order
|
||||||
* in which documents have been added.
|
* in which documents have been added.
|
||||||
* <p><b>NOTE</b>: Never use this policy if you rely on
|
* <p><b>NOTE</b>: Never use this policy if you rely on
|
||||||
* {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
|
* {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments}
|
||||||
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
|
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
|
||||||
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
|
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
|
||||||
* so that the order of segments is predictable. For example, using
|
* so that the order of segments is predictable. For example, using
|
||||||
|
|
|
@ -103,11 +103,6 @@ public class RandomIndexWriter implements Closeable {
|
||||||
* @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument)
|
* @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument)
|
||||||
*/
|
*/
|
||||||
public <T extends IndexableField> void addDocument(final IndexDocument doc) throws IOException {
|
public <T extends IndexableField> void addDocument(final IndexDocument doc) throws IOException {
|
||||||
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
|
|
||||||
addDocument(doc, w.getAnalyzer());
|
|
||||||
}
|
|
||||||
|
|
||||||
public <T extends IndexableField> void addDocument(final IndexDocument doc, Analyzer a) throws IOException {
|
|
||||||
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
|
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
|
||||||
if (r.nextInt(5) == 3) {
|
if (r.nextInt(5) == 3) {
|
||||||
// TODO: maybe, we should simply buffer up added docs
|
// TODO: maybe, we should simply buffer up added docs
|
||||||
|
@ -141,9 +136,9 @@ public class RandomIndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}, a);
|
});
|
||||||
} else {
|
} else {
|
||||||
w.addDocument(doc, a);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
maybeCommit();
|
maybeCommit();
|
||||||
|
|
|
@ -536,7 +536,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
||||||
log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
|
log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
|
||||||
+ " Creating new index...");
|
+ " Creating new index...");
|
||||||
|
|
||||||
SolrIndexWriter writer = SolrIndexWriter.create("SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
||||||
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
|
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -264,7 +264,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
|
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
|
||||||
return SolrIndexWriter.create(name, core.getNewIndexDir(),
|
return SolrIndexWriter.create(core, name, core.getNewIndexDir(),
|
||||||
core.getDirectoryFactory(), false, core.getLatestSchema(),
|
core.getDirectoryFactory(), false, core.getLatestSchema(),
|
||||||
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
||||||
}
|
}
|
||||||
|
|
|
@ -235,11 +235,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmd.isBlock()) {
|
if (cmd.isBlock()) {
|
||||||
writer.updateDocuments(updateTerm, cmd, schema.getIndexAnalyzer());
|
writer.updateDocuments(updateTerm, cmd);
|
||||||
} else {
|
} else {
|
||||||
Document luceneDocument = cmd.getLuceneDocument();
|
Document luceneDocument = cmd.getLuceneDocument();
|
||||||
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
|
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
|
||||||
writer.updateDocument(updateTerm, luceneDocument, schema.getIndexAnalyzer());
|
writer.updateDocument(updateTerm, luceneDocument);
|
||||||
}
|
}
|
||||||
// SolrCore.verbose("updateDocument",updateTerm,"DONE");
|
// SolrCore.verbose("updateDocument",updateTerm,"DONE");
|
||||||
|
|
||||||
|
@ -264,9 +264,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||||
} else {
|
} else {
|
||||||
// allow duplicates
|
// allow duplicates
|
||||||
if (cmd.isBlock()) {
|
if (cmd.isBlock()) {
|
||||||
writer.addDocuments(cmd, schema.getIndexAnalyzer());
|
writer.addDocuments(cmd);
|
||||||
} else {
|
} else {
|
||||||
writer.addDocument(cmd.getLuceneDocument(), schema.getIndexAnalyzer());
|
writer.addDocument(cmd.getLuceneDocument());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ulog != null) ulog.add(cmd);
|
if (ulog != null) ulog.add(cmd);
|
||||||
|
@ -442,8 +442,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||||
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
||||||
try {
|
try {
|
||||||
IndexWriter writer = iw.get();
|
IndexWriter writer = iw.get();
|
||||||
writer.updateDocument(idTerm, luceneDocument, cmd.getReq().getSchema()
|
writer.updateDocument(idTerm, luceneDocument);
|
||||||
.getIndexAnalyzer());
|
|
||||||
|
|
||||||
for (Query q : dbqList) {
|
for (Query q : dbqList) {
|
||||||
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));
|
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));
|
||||||
|
|
|
@ -17,6 +17,14 @@
|
||||||
|
|
||||||
package org.apache.solr.update;
|
package org.apache.solr.update;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
@ -24,16 +32,14 @@ import org.apache.lucene.util.Version;
|
||||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.MapSerializable;
|
import org.apache.solr.core.MapSerializable;
|
||||||
import org.apache.solr.core.SolrConfig;
|
|
||||||
import org.apache.solr.core.PluginInfo;
|
import org.apache.solr.core.PluginInfo;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.util.SolrPluginUtils;
|
import org.apache.solr.util.SolrPluginUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import static org.apache.solr.core.Config.assertWarnOrFail;
|
import static org.apache.solr.core.Config.assertWarnOrFail;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -180,12 +186,23 @@ public class SolrIndexConfig implements MapSerializable {
|
||||||
return l.isEmpty() ? def : l.get(0);
|
return l.isEmpty() ? def : l.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) {
|
private static class DelayedSchemaAnalyzer extends DelegatingAnalyzerWrapper {
|
||||||
// so that we can update the analyzer on core reload, we pass null
|
private final SolrCore core;
|
||||||
// for the default analyzer, and explicitly pass an analyzer on
|
|
||||||
// appropriate calls to IndexWriter
|
public DelayedSchemaAnalyzer(SolrCore core) {
|
||||||
|
super(PER_FIELD_REUSE_STRATEGY);
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(null);
|
this.core = core;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||||
|
return core.getLatestSchema().getIndexAnalyzer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexWriterConfig toIndexWriterConfig(SolrCore core) {
|
||||||
|
IndexSchema schema = core.getLatestSchema();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
|
||||||
if (maxBufferedDocs != -1)
|
if (maxBufferedDocs != -1)
|
||||||
iwc.setMaxBufferedDocs(maxBufferedDocs);
|
iwc.setMaxBufferedDocs(maxBufferedDocs);
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ public class SolrIndexSplitter {
|
||||||
} else {
|
} else {
|
||||||
SolrCore core = searcher.getCore();
|
SolrCore core = searcher.getCore();
|
||||||
String path = paths.get(partitionNumber);
|
String path = paths.get(partitionNumber);
|
||||||
iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
|
iw = SolrIndexWriter.create(core, "SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
|
||||||
core.getDirectoryFactory(), true, core.getLatestSchema(),
|
core.getDirectoryFactory(), true, core.getLatestSchema(),
|
||||||
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,8 +27,9 @@ import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.solr.common.util.IOUtils;
|
import org.apache.solr.common.util.IOUtils;
|
||||||
import org.apache.solr.core.DirectoryFactory;
|
|
||||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||||
|
import org.apache.solr.core.DirectoryFactory;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -56,12 +57,12 @@ public class SolrIndexWriter extends IndexWriter {
|
||||||
private InfoStream infoStream;
|
private InfoStream infoStream;
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
|
|
||||||
public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
public static SolrIndexWriter create(SolrCore core, String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||||
|
|
||||||
SolrIndexWriter w = null;
|
SolrIndexWriter w = null;
|
||||||
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
|
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
|
||||||
try {
|
try {
|
||||||
w = new SolrIndexWriter(name, path, d, create, schema,
|
w = new SolrIndexWriter(core, name, path, d, create, schema,
|
||||||
config, delPolicy, codec);
|
config, delPolicy, codec);
|
||||||
w.setDirectoryFactory(directoryFactory);
|
w.setDirectoryFactory(directoryFactory);
|
||||||
return w;
|
return w;
|
||||||
|
@ -73,9 +74,9 @@ public class SolrIndexWriter extends IndexWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrIndexWriter(String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
private SolrIndexWriter(SolrCore core, String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||||
super(directory,
|
super(directory,
|
||||||
config.toIndexWriterConfig(schema).
|
config.toIndexWriterConfig(core).
|
||||||
setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND).
|
setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND).
|
||||||
setIndexDeletionPolicy(delPolicy).setCodec(codec)
|
setIndexDeletionPolicy(delPolicy).setCodec(codec)
|
||||||
);
|
);
|
||||||
|
|
|
@ -116,7 +116,7 @@ public class TestConfig extends SolrTestCaseJ4 {
|
||||||
assertEquals("default useCompoundFile", false, sic.getUseCompoundFile());
|
assertEquals("default useCompoundFile", false, sic.getUseCompoundFile());
|
||||||
|
|
||||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||||
IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema);
|
IndexWriterConfig iwc = sic.toIndexWriterConfig(h.getCore());
|
||||||
|
|
||||||
assertNotNull("null mp", iwc.getMergePolicy());
|
assertNotNull("null mp", iwc.getMergePolicy());
|
||||||
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class TestInfoStreamLogging extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIndexConfig() throws Exception {
|
public void testIndexConfig() throws Exception {
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
|
|
||||||
assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream);
|
assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream);
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
public void testDefaultMergePolicyConfig() throws Exception {
|
public void testDefaultMergePolicyConfig() throws Exception {
|
||||||
initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml");
|
initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml");
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
assertEquals(false, iwc.getUseCompoundFile());
|
assertEquals(false, iwc.getUseCompoundFile());
|
||||||
|
|
||||||
TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
|
TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
|
||||||
|
@ -61,7 +61,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
||||||
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
||||||
|
|
||||||
initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml");
|
initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml");
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
||||||
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
||||||
|
|
||||||
initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml");
|
initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml");
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
||||||
System.setProperty("solr.test.log.merge.policy", mpClass.getName());
|
System.setProperty("solr.test.log.merge.policy", mpClass.getName());
|
||||||
|
|
||||||
initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml");
|
initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml");
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
|
|
||||||
// verify some props set to -1 get lucene internal defaults
|
// verify some props set to -1 get lucene internal defaults
|
||||||
assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);
|
assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class TestSolrIndexConfig extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
|
||||||
public void testIndexConfigParsing() throws Exception {
|
public void testIndexConfigParsing() throws Exception {
|
||||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||||
try {
|
try {
|
||||||
checkIndexWriterConfig(iwc);
|
checkIndexWriterConfig(iwc);
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.solr.update;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
|
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
|
||||||
|
@ -26,13 +30,10 @@ import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.core.TestMergePolicyConfig;
|
import org.apache.solr.core.TestMergePolicyConfig;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.IndexSchemaFactory;
|
import org.apache.solr.schema.IndexSchemaFactory;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Testcase for {@link SolrIndexConfig}
|
* Testcase for {@link SolrIndexConfig}
|
||||||
*
|
*
|
||||||
|
@ -40,13 +41,19 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml","schema.xml");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFailingSolrIndexConfigCreation() {
|
public void testFailingSolrIndexConfigCreation() {
|
||||||
try {
|
try {
|
||||||
SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml");
|
SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml");
|
||||||
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
|
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
|
||||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||||
solrIndexConfig.toIndexWriterConfig(indexSchema);
|
h.getCore().setLatestSchema(indexSchema);
|
||||||
|
solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||||
fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail ");
|
fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail ");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// it failed as expected
|
// it failed as expected
|
||||||
|
@ -61,8 +68,9 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
||||||
null);
|
null);
|
||||||
assertNotNull(solrIndexConfig);
|
assertNotNull(solrIndexConfig);
|
||||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||||
|
|
||||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema);
|
h.getCore().setLatestSchema(indexSchema);
|
||||||
|
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||||
|
|
||||||
assertNotNull("null mp", iwc.getMergePolicy());
|
assertNotNull("null mp", iwc.getMergePolicy());
|
||||||
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
||||||
|
@ -87,7 +95,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
||||||
assertEquals(SimpleMergedSegmentWarmer.class.getName(),
|
assertEquals(SimpleMergedSegmentWarmer.class.getName(),
|
||||||
solrIndexConfig.mergedSegmentWarmerInfo.className);
|
solrIndexConfig.mergedSegmentWarmerInfo.className);
|
||||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema);
|
h.getCore().setLatestSchema(indexSchema);
|
||||||
|
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||||
assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass());
|
assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue