LUCENE-6212: remove per-doc analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-02-01 09:12:09 +00:00
parent b5db48c783
commit 1529c57ca1
25 changed files with 574 additions and 647 deletions

View File

@ -392,6 +392,11 @@ API Changes
Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been
removed and boolean queries now always score in order. removed and boolean queries now always score in order.
* LUCENE-6212: IndexWriter no longer accepts per-document Analyzer to
add/updateDocument. These methods were trappy as they made it
easy to accidentally index tokens that were not easily
searchable. (Mike McCandless)
Bug Fixes Bug Fixes
* LUCENE-5650: Enforce read-only access to any path outside the temporary * LUCENE-5650: Enforce read-only access to any path outside the temporary

View File

@ -75,7 +75,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
doc.add(new TextField("title", dummyTitle, Field.Store.YES)); doc.add(new TextField("title", dummyTitle, Field.Store.YES));
String dummyContent = "there is some content written here"; String dummyContent = "there is some content written here";
doc.add(new TextField("contents", dummyContent, Field.Store.YES)); doc.add(new TextField("contents", dummyContent, Field.Store.YES));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
writer.commit(); writer.commit();
// try the search over the first doc // try the search over the first doc
@ -96,7 +96,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
doc.add(new TextField("title", dogmasTitle, Field.Store.YES)); doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
String dogmasContents = "white men can't jump"; String dogmasContents = "white men can't jump";
doc.add(new TextField("contents", dogmasContents, Field.Store.YES)); doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
writer.commit(); writer.commit();
directoryReader.close(); directoryReader.close();

View File

@ -16,11 +16,15 @@
*/ */
package org.apache.lucene.classification; package org.apache.lucene.classification;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SlowCompositeReaderWrapper;
@ -32,9 +36,6 @@ import org.apache.lucene.util.TestUtil;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import java.io.IOException;
import java.util.Random;
/** /**
* Base class for testing {@link Classifier}s * Base class for testing {@link Classifier}s
*/ */
@ -113,7 +114,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass()); assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
double score = classificationResult.getScore(); double score = classificationResult.getScore();
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0); assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
updateSampleIndex(analyzer); updateSampleIndex();
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc); ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass()); assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore())); assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
@ -125,7 +126,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
} }
private void populateSampleIndex(Analyzer analyzer) throws IOException { private void populateSampleIndex(Analyzer analyzer) throws IOException {
indexWriter.deleteAll(); indexWriter.close();
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
indexWriter.commit(); indexWriter.commit();
String text; String text;
@ -138,7 +140,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" + text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
@ -146,7 +148,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "And there's a threshold question that he has to answer for the American people and " + text = "And there's a threshold question that he has to answer for the American people and " +
@ -155,7 +157,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " + text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
@ -164,7 +166,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " + text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
@ -173,7 +175,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "So, about all those experts and analysts who've spent the past year or so saying " + text = "So, about all those experts and analysts who've spent the past year or so saying " +
@ -181,7 +183,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "More than 400 million people trust Google with their e-mail, and 50 million store files" + text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
@ -190,12 +192,12 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "unlabeled doc"; text = "unlabeled doc";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
indexWriter.commit(); indexWriter.commit();
} }
@ -217,7 +219,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
} }
private void populatePerformanceIndex(Analyzer analyzer) throws IOException { private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
indexWriter.deleteAll(); indexWriter.close();
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
indexWriter.commit(); indexWriter.commit();
FieldType ft = new FieldType(TextField.TYPE_STORED); FieldType ft = new FieldType(TextField.TYPE_STORED);
@ -232,7 +235,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(textFieldName, createRandomString(random), ft)); doc.add(new Field(textFieldName, createRandomString(random), ft));
doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft)); doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
doc.add(new Field(booleanFieldName, String.valueOf(b), ft)); doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
} }
indexWriter.commit(); indexWriter.commit();
} }
@ -246,7 +249,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
return builder.toString(); return builder.toString();
} }
private void updateSampleIndex(Analyzer analyzer) throws Exception { private void updateSampleIndex() throws Exception {
String text; String text;
@ -256,54 +259,54 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role."; text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role.";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement"; text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Illinois becomes 16th state to allow same-sex marriage."; text = "Illinois becomes 16th state to allow same-sex marriage.";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft)); doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft)); doc.add(new Field(booleanFieldName, "true", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report."; text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report.";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement."; text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement.";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009."; text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009.";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft)); doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft)); doc.add(new Field(booleanFieldName, "false", ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
doc = new Document(); doc = new Document();
text = "second unlabeled doc"; text = "second unlabeled doc";
doc.add(new Field(textFieldName, text, ft)); doc.add(new Field(textFieldName, text, ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
indexWriter.commit(); indexWriter.commit();
} }

View File

@ -64,8 +64,6 @@ public class DataSplitterTest extends LuceneTestCase {
ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorPositions(true);
Analyzer analyzer = new MockAnalyzer(random());
Document doc; Document doc;
Random rnd = random(); Random rnd = random();
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
@ -73,7 +71,7 @@ public class DataSplitterTest extends LuceneTestCase {
doc.add(new Field(idFieldName, Integer.toString(i), ft)); doc.add(new Field(idFieldName, Integer.toString(i), ft));
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft)); doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft)); doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
} }
indexWriter.commit(); indexWriter.commit();

View File

@ -55,14 +55,12 @@ public class DocToDoubleVectorUtilsTest extends LuceneTestCase {
ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorPositions(true);
Analyzer analyzer = new MockAnalyzer(random());
Document doc; Document doc;
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
doc = new Document(); doc = new Document();
doc.add(new Field("id", Integer.toString(i), ft)); doc.add(new Field("id", Integer.toString(i), ft));
doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft)); doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
indexWriter.addDocument(doc, analyzer); indexWriter.addDocument(doc);
} }
indexWriter.commit(); indexWriter.commit();

View File

@ -1127,22 +1127,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void addDocument(IndexDocument doc) throws IOException { public void addDocument(IndexDocument doc) throws IOException {
addDocument(doc, analyzer); updateDocument(null, doc);
}
/**
* Adds a document to this index, using the provided analyzer instead of the
* value of {@link #getAnalyzer()}.
*
* <p>See {@link #addDocument(IndexDocument)} for details on
* index and IndexWriter state after an Exception, and
* flushing/merging temporary free space requirements.</p>
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException {
updateDocument(null, doc, analyzer);
} }
/** /**
@ -1183,22 +1168,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* @lucene.experimental * @lucene.experimental
*/ */
public void addDocuments(Iterable<? extends IndexDocument> docs) throws IOException { public void addDocuments(Iterable<? extends IndexDocument> docs) throws IOException {
addDocuments(docs, analyzer); updateDocuments(null, docs);
}
/**
* Atomically adds a block of documents, analyzed using the
* provided analyzer, with sequentially assigned document
* IDs, such that an external reader will see all or none
* of the documents.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void addDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
updateDocuments(null, docs, analyzer);
} }
/** /**
@ -1215,24 +1185,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* @lucene.experimental * @lucene.experimental
*/ */
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs) throws IOException { public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs) throws IOException {
updateDocuments(delTerm, docs, analyzer);
}
/**
* Atomically deletes documents matching the provided
* delTerm and adds a block of documents, analyzed using
* the provided analyzer, with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* See {@link #addDocuments(Iterable)}.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
ensureOpen(); ensureOpen();
try { try {
boolean success = false; boolean success = false;
@ -1384,26 +1336,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
*/ */
public void updateDocument(Term term, IndexDocument doc) throws IOException { public void updateDocument(Term term, IndexDocument doc) throws IOException {
ensureOpen(); ensureOpen();
updateDocument(term, doc, analyzer);
}
/**
* Updates a document by first deleting the document(s)
* containing <code>term</code> and then adding the new
* document. The delete and then add are atomic as seen
* by a reader on the same index (flush may happen only after
* the add).
*
* @param term the term to identify the document(s) to be
* deleted
* @param doc the document to be added
* @param analyzer the analyzer to use when analyzing the document
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer)
throws IOException {
ensureOpen();
try { try {
boolean success = false; boolean success = false;
try { try {

View File

@ -47,15 +47,6 @@ public class TrackingIndexWriter {
this.writer = writer; this.writer = writer;
} }
/** Calls {@link
* IndexWriter#updateDocument(Term,IndexDocument,Analyzer)}
* and returns the generation that reflects this change. */
public long updateDocument(Term t, IndexDocument d, Analyzer a) throws IOException {
writer.updateDocument(t, d, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
/** Calls {@link /** Calls {@link
* IndexWriter#updateDocument(Term,IndexDocument)} and * IndexWriter#updateDocument(Term,IndexDocument)} and
* returns the generation that reflects this change. */ * returns the generation that reflects this change. */
@ -65,15 +56,6 @@ public class TrackingIndexWriter {
return indexingGen.get(); return indexingGen.get();
} }
/** Calls {@link
* IndexWriter#updateDocuments(Term,Iterable,Analyzer)}
* and returns the generation that reflects this change. */
public long updateDocuments(Term t, Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
writer.updateDocuments(t, docs, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
/** Calls {@link /** Calls {@link
* IndexWriter#updateDocuments(Term,Iterable)} and returns * IndexWriter#updateDocuments(Term,Iterable)} and returns
* the generation that reflects this change. */ * the generation that reflects this change. */
@ -123,24 +105,6 @@ public class TrackingIndexWriter {
return indexingGen.get(); return indexingGen.get();
} }
/** Calls {@link
* IndexWriter#addDocument(IndexDocument,Analyzer)} and
* returns the generation that reflects this change. */
public long addDocument(IndexDocument d, Analyzer a) throws IOException {
writer.addDocument(d, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
/** Calls {@link
* IndexWriter#addDocuments(Iterable,Analyzer)} and
* returns the generation that reflects this change. */
public long addDocuments(Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
writer.addDocuments(docs, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
/** Calls {@link IndexWriter#addDocument(IndexDocument)} /** Calls {@link IndexWriter#addDocument(IndexDocument)}
* and returns the generation that reflects this change. */ * and returns the generation that reflects this change. */
public long addDocument(IndexDocument d) throws IOException { public long addDocument(IndexDocument d) throws IOException {

View File

@ -305,7 +305,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
} }
}; };
final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory()); final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a);
final Document doc = new Document(); final Document doc = new Document();
final FieldType ft = new FieldType(); final FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS); ft.setIndexOptions(IndexOptions.DOCS);
@ -315,7 +315,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorOffsets(true);
doc.add(new Field("f", "a", ft)); doc.add(new Field("f", "a", ft));
doc.add(new Field("f", "a", ft)); doc.add(new Field("f", "a", ft));
writer.addDocument(doc, a); writer.addDocument(doc);
final LeafReader reader = getOnlySegmentReader(writer.getReader()); final LeafReader reader = getOnlySegmentReader(writer.getReader());
final Fields fields = reader.getTermVectors(0); final Fields fields = reader.getTermVectors(0);
final Terms terms = fields.terms("f"); final Terms terms = fields.terms("f");

View File

@ -392,27 +392,35 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
// LUCENE-1208 // LUCENE-1208
public void testExceptionJustBeforeFlush() throws IOException { public void testExceptionJustBeforeFlush() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
newIndexWriterConfig(new MockAnalyzer(random())) final AtomicBoolean doCrash = new AtomicBoolean();
.setMaxBufferedDocs(2),
new TestPoint1());
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override @Override
public TokenStreamComponents createComponents(String fieldName) { public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); TokenStream stream = tokenizer;
if (doCrash.get()) {
stream = new CrashingFilter(fieldName, stream);
}
return new TokenStreamComponents(tokenizer, stream);
} }
}; };
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
newIndexWriterConfig(analyzer)
.setMaxBufferedDocs(2),
new TestPoint1());
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
Document crashDoc = new Document(); Document crashDoc = new Document();
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES)); crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
doCrash.set(true);
try { try {
w.addDocument(crashDoc, analyzer); w.addDocument(crashDoc);
fail("did not hit expected exception"); fail("did not hit expected exception");
} catch (IOException ioe) { } catch (IOException ioe) {
// expected // expected

View File

@ -42,521 +42,520 @@ import org.apache.lucene.util.TestUtil;
public class TestPayloads extends LuceneTestCase { public class TestPayloads extends LuceneTestCase {
// Simple tests to test the Payload class // Simple tests to test the Payload class
public void testPayload() throws Exception { public void testPayload() throws Exception {
BytesRef payload = new BytesRef("This is a test!"); BytesRef payload = new BytesRef("This is a test!");
assertEquals("Wrong payload length.", "This is a test!".length(), payload.length); assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
BytesRef clone = payload.clone(); BytesRef clone = payload.clone();
assertEquals(payload.length, clone.length); assertEquals(payload.length, clone.length);
for (int i = 0; i < payload.length; i++) { for (int i = 0; i < payload.length; i++) {
assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]); assertEquals(payload.bytes[i + payload.offset], clone.bytes[i + clone.offset]);
} }
} }
// Tests whether the DocumentWriter and SegmentMerger correctly enable the // Tests whether the DocumentWriter and SegmentMerger correctly enable the
// payload bit in the FieldInfo // payload bit in the FieldInfo
public void testPayloadFieldBit() throws Exception { public void testPayloadFieldBit() throws Exception {
Directory ram = newDirectory(); Directory ram = newDirectory();
PayloadAnalyzer analyzer = new PayloadAnalyzer(); PayloadAnalyzer analyzer = new PayloadAnalyzer();
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
Document d = new Document(); Document d = new Document();
// this field won't have any payloads // this field won't have any payloads
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
// this field will have payloads in all docs, however not for all term positions, // this field will have payloads in all docs, however not for all term positions,
// so this field is used to check if the DocumentWriter correctly enables the payloads bit // so this field is used to check if the DocumentWriter correctly enables the payloads bit
// even if only some term positions have payloads // even if only some term positions have payloads
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO)); d.add(newTextField("f2", "This field has payloads in all docs NO PAYLOAD", Field.Store.NO));
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
// enabled in only some documents // enabled in only some documents
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// only add payload data for field f2 // only add payload data for field f2
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
writer.addDocument(d); writer.addDocument(d);
// flush // flush
writer.close(); writer.close();
SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram)); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(ram));
FieldInfos fi = reader.getFieldInfos(); FieldInfos fi = reader.getFieldInfos();
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads()); assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").hasPayloads());
reader.close(); reader.close();
// now we add another document which has payloads for field f3 and verify if the SegmentMerger // now we add another document which has payloads for field f3 and verify if the SegmentMerger
// enabled payloads for that field // enabled payloads for that field
analyzer = new PayloadAnalyzer(); // Clear payload state for each field analyzer = new PayloadAnalyzer(); // Clear payload state for each field
writer = new IndexWriter(ram, newIndexWriterConfig(analyzer) writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
.setOpenMode(OpenMode.CREATE)); .setOpenMode(OpenMode.CREATE));
d = new Document(); d = new Document();
d.add(newTextField("f1", "This field has no payloads", Field.Store.NO)); d.add(newTextField("f1", "This field has no payloads", Field.Store.NO));
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// add payload data for field f2 and f3 // add payload data for field f2 and f3
analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1); analyzer.setPayloadData("f2", "somedata".getBytes(StandardCharsets.UTF_8), 0, 1);
analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3); analyzer.setPayloadData("f3", "somedata".getBytes(StandardCharsets.UTF_8), 0, 3);
writer.addDocument(d); writer.addDocument(d);
// force merge // force merge
writer.forceMerge(1); writer.forceMerge(1);
// flush // flush
writer.close(); writer.close();
reader = getOnlySegmentReader(DirectoryReader.open(ram)); reader = getOnlySegmentReader(DirectoryReader.open(ram));
fi = reader.getFieldInfos(); fi = reader.getFieldInfos();
assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads()); assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").hasPayloads());
assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads()); assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").hasPayloads());
assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads()); assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").hasPayloads());
reader.close(); reader.close();
ram.close(); ram.close();
} }
// Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory // Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory
public void testPayloadsEncoding() throws Exception { public void testPayloadsEncoding() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
performTest(dir); performTest(dir);
dir.close(); dir.close();
} }
// builds an index with payloads in the given Directory and performs // builds an index with payloads in the given Directory and performs
// different tests to verify the payload encoding // different tests to verify the payload encoding
private void performTest(Directory dir) throws Exception { private void performTest(Directory dir) throws Exception {
PayloadAnalyzer analyzer = new PayloadAnalyzer(); PayloadAnalyzer analyzer = new PayloadAnalyzer();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
.setOpenMode(OpenMode.CREATE) .setOpenMode(OpenMode.CREATE)
.setMergePolicy(newLogMergePolicy())); .setMergePolicy(newLogMergePolicy()));
// should be in sync with value in TermInfosWriter // should be in sync with value in TermInfosWriter
final int skipInterval = 16; final int skipInterval = 16;
final int numTerms = 5; final int numTerms = 5;
final String fieldName = "f1"; final String fieldName = "f1";
int numDocs = skipInterval + 1; int numDocs = skipInterval + 1;
// create content for the test documents with just a few terms // create content for the test documents with just a few terms
Term[] terms = generateTerms(fieldName, numTerms); Term[] terms = generateTerms(fieldName, numTerms);
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (int i = 0; i < terms.length; i++) { for (int i = 0; i < terms.length; i++) {
sb.append(terms[i].text()); sb.append(terms[i].text());
sb.append(" "); sb.append(" ");
} }
String content = sb.toString(); String content = sb.toString();
int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
byte[] payloadData = generateRandomData(payloadDataLength); byte[] payloadData = generateRandomData(payloadDataLength);
Document d = new Document(); Document d = new Document();
d.add(newTextField(fieldName, content, Field.Store.NO)); d.add(newTextField(fieldName, content, Field.Store.NO));
// add the same document multiple times to have the same payload lengths for all // add the same document multiple times to have the same payload lengths for all
// occurrences within two consecutive skip intervals // occurrences within two consecutive skip intervals
int offset = 0; int offset = 0;
for (int i = 0; i < 2 * numDocs; i++) { for (int i = 0; i < 2 * numDocs; i++) {
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); analyzer.setPayloadData(fieldName, payloadData, offset, 1);
offset += numTerms; offset += numTerms;
writer.addDocument(d, analyzer); writer.addDocument(d);
} }
// make sure we create more than one segment to test merging // make sure we create more than one segment to test merging
writer.commit(); writer.commit();
// now we make sure to have different payload lengths next at the next skip point // now we make sure to have different payload lengths next at the next skip point
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); analyzer.setPayloadData(fieldName, payloadData, offset, i);
offset += i * numTerms; offset += i * numTerms;
writer.addDocument(d, analyzer); writer.addDocument(d);
} }
writer.forceMerge(1); writer.forceMerge(1);
// flush // flush
writer.close(); writer.close();
/* /*
* Verify the index * Verify the index
* first we test if all payloads are stored correctly * first we test if all payloads are stored correctly
*/ */
IndexReader reader = DirectoryReader.open(dir); IndexReader reader = DirectoryReader.open(dir);
byte[] verifyPayloadData = new byte[payloadDataLength]; byte[] verifyPayloadData = new byte[payloadDataLength];
offset = 0; offset = 0;
DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms]; DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms];
for (int i = 0; i < numTerms; i++) { for (int i = 0; i < numTerms; i++) {
tps[i] = MultiFields.getTermPositionsEnum(reader, tps[i] = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader), MultiFields.getLiveDocs(reader),
terms[i].field(), terms[i].field(),
new BytesRef(terms[i].text())); new BytesRef(terms[i].text()));
} }
while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 1; i < numTerms; i++) { for (int i = 1; i < numTerms; i++) {
tps[i].nextDoc(); tps[i].nextDoc();
} }
int freq = tps[0].freq(); int freq = tps[0].freq();
for (int i = 0; i < freq; i++) { for (int i = 0; i < freq; i++) {
for (int j = 0; j < numTerms; j++) { for (int j = 0; j < numTerms; j++) {
tps[j].nextPosition(); tps[j].nextPosition();
BytesRef br = tps[j].getPayload(); BytesRef br = tps[j].getPayload();
if (br != null) { if (br != null) {
System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length); System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length);
offset += br.length; offset += br.length;
} }
}
}
} }
}
}
assertByteArrayEquals(payloadData, verifyPayloadData); assertByteArrayEquals(payloadData, verifyPayloadData);
/* /*
* test lazy skipping * test lazy skipping
*/ */
DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader, DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader), MultiFields.getLiveDocs(reader),
terms[0].field(), terms[0].field(),
new BytesRef(terms[0].text())); new BytesRef(terms[0].text()));
tp.nextDoc(); tp.nextDoc();
tp.nextPosition(); tp.nextPosition();
// NOTE: prior rev of this test was failing to first // NOTE: prior rev of this test was failing to first
// call next here: // call next here:
tp.nextDoc(); tp.nextDoc();
// now we don't read this payload // now we don't read this payload
tp.nextPosition(); tp.nextPosition();
BytesRef payload = tp.getPayload(); BytesRef payload = tp.getPayload();
assertEquals("Wrong payload length.", 1, payload.length); assertEquals("Wrong payload length.", 1, payload.length);
assertEquals(payload.bytes[payload.offset], payloadData[numTerms]); assertEquals(payload.bytes[payload.offset], payloadData[numTerms]);
tp.nextDoc(); tp.nextDoc();
tp.nextPosition(); tp.nextPosition();
// we don't read this payload and skip to a different document // we don't read this payload and skip to a different document
tp.advance(5); tp.advance(5);
tp.nextPosition(); tp.nextPosition();
payload = tp.getPayload(); payload = tp.getPayload();
assertEquals("Wrong payload length.", 1, payload.length); assertEquals("Wrong payload length.", 1, payload.length);
assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]); assertEquals(payload.bytes[payload.offset], payloadData[5 * numTerms]);
/* /*
* Test different lengths at skip points * Test different lengths at skip points
*/ */
tp = MultiFields.getTermPositionsEnum(reader, tp = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader), MultiFields.getLiveDocs(reader),
terms[1].field(), terms[1].field(),
new BytesRef(terms[1].text())); new BytesRef(terms[1].text()));
tp.nextDoc(); tp.nextDoc();
tp.nextPosition(); tp.nextPosition();
assertEquals("Wrong payload length.", 1, tp.getPayload().length); assertEquals("Wrong payload length.", 1, tp.getPayload().length);
tp.advance(skipInterval - 1); tp.advance(skipInterval - 1);
tp.nextPosition(); tp.nextPosition();
assertEquals("Wrong payload length.", 1, tp.getPayload().length); assertEquals("Wrong payload length.", 1, tp.getPayload().length);
tp.advance(2 * skipInterval - 1); tp.advance(2 * skipInterval - 1);
tp.nextPosition(); tp.nextPosition();
assertEquals("Wrong payload length.", 1, tp.getPayload().length); assertEquals("Wrong payload length.", 1, tp.getPayload().length);
tp.advance(3 * skipInterval - 1); tp.advance(3 * skipInterval - 1);
tp.nextPosition(); tp.nextPosition();
assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length); assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayload().length);
reader.close(); reader.close();
// test long payload // test long payload
analyzer = new PayloadAnalyzer(); analyzer = new PayloadAnalyzer();
writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)
.setOpenMode(OpenMode.CREATE)); .setOpenMode(OpenMode.CREATE));
String singleTerm = "lucene"; String singleTerm = "lucene";
d = new Document(); d = new Document();
d.add(newTextField(fieldName, singleTerm, Field.Store.NO)); d.add(newTextField(fieldName, singleTerm, Field.Store.NO));
// add a payload whose length is greater than the buffer size of BufferedIndexOutput // add a payload whose length is greater than the buffer size of BufferedIndexOutput
payloadData = generateRandomData(2000); payloadData = generateRandomData(2000);
analyzer.setPayloadData(fieldName, payloadData, 100, 1500); analyzer.setPayloadData(fieldName, payloadData, 100, 1500);
writer.addDocument(d); writer.addDocument(d);
writer.forceMerge(1); writer.forceMerge(1);
// flush // flush
writer.close(); writer.close();
reader = DirectoryReader.open(dir); reader = DirectoryReader.open(dir);
tp = MultiFields.getTermPositionsEnum(reader, tp = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader), MultiFields.getLiveDocs(reader),
fieldName, fieldName,
new BytesRef(singleTerm)); new BytesRef(singleTerm));
tp.nextDoc(); tp.nextDoc();
tp.nextPosition(); tp.nextPosition();
BytesRef br = tp.getPayload(); BytesRef br = tp.getPayload();
verifyPayloadData = new byte[br.length]; verifyPayloadData = new byte[br.length];
byte[] portion = new byte[1500]; byte[] portion = new byte[1500];
System.arraycopy(payloadData, 100, portion, 0, 1500); System.arraycopy(payloadData, 100, portion, 0, 1500);
assertByteArrayEquals(portion, br.bytes, br.offset, br.length); assertByteArrayEquals(portion, br.bytes, br.offset, br.length);
reader.close(); reader.close();
} }
static final Charset utf8 = StandardCharsets.UTF_8; static final Charset utf8 = StandardCharsets.UTF_8;
private void generateRandomData(byte[] data) { private void generateRandomData(byte[] data) {
// this test needs the random data to be valid unicode // this test needs the random data to be valid unicode
String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length); String s = TestUtil.randomFixedByteLengthUnicodeString(random(), data.length);
byte b[] = s.getBytes(utf8); byte b[] = s.getBytes(utf8);
assert b.length == data.length; assert b.length == data.length;
System.arraycopy(b, 0, data, 0, b.length); System.arraycopy(b, 0, data, 0, b.length);
} }
private byte[] generateRandomData(int n) { private byte[] generateRandomData(int n) {
byte[] data = new byte[n]; byte[] data = new byte[n];
generateRandomData(data); generateRandomData(data);
return data; return data;
} }
private Term[] generateTerms(String fieldName, int n) { private Term[] generateTerms(String fieldName, int n) {
int maxDigits = (int) (Math.log(n) / Math.log(10)); int maxDigits = (int) (Math.log(n) / Math.log(10));
Term[] terms = new Term[n]; Term[] terms = new Term[n];
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
sb.setLength(0); sb.setLength(0);
sb.append("t"); sb.append("t");
int zeros = maxDigits - (int) (Math.log(i) / Math.log(10)); int zeros = maxDigits - (int) (Math.log(i) / Math.log(10));
for (int j = 0; j < zeros; j++) { for (int j = 0; j < zeros; j++) {
sb.append("0"); sb.append("0");
} }
sb.append(i); sb.append(i);
terms[i] = new Term(fieldName, sb.toString()); terms[i] = new Term(fieldName, sb.toString());
}
return terms;
} }
return terms;
}
void assertByteArrayEquals(byte[] b1, byte[] b2) { void assertByteArrayEquals(byte[] b1, byte[] b2) {
if (b1.length != b2.length) { if (b1.length != b2.length) {
fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length); fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length);
} }
for (int i = 0; i < b1.length; i++) { for (int i = 0; i < b1.length; i++) {
if (b1[i] != b2[i]) { if (b1[i] != b2[i]) {
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]); fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]);
} }
} }
} }
void assertByteArrayEquals(byte[] b1, byte[] b2, int b2offset, int b2length) { void assertByteArrayEquals(byte[] b1, byte[] b2, int b2offset, int b2length) {
if (b1.length != b2length) { if (b1.length != b2length) {
fail("Byte arrays have different lengths: " + b1.length + ", " + b2length); fail("Byte arrays have different lengths: " + b1.length + ", " + b2length);
} }
for (int i = 0; i < b1.length; i++) { for (int i = 0; i < b1.length; i++) {
if (b1[i] != b2[b2offset+i]) { if (b1[i] != b2[b2offset+i]) {
fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]); fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[b2offset+i]);
} }
} }
} }
/** static class PayloadData {
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter. byte[] data;
*/ int offset;
private static class PayloadAnalyzer extends Analyzer { int length;
Map<String,PayloadData> fieldToData = new HashMap<>();
public PayloadAnalyzer() { PayloadData(byte[] data, int offset, int length) {
super(PER_FIELD_REUSE_STRATEGY); this.data = data;
} this.offset = offset;
this.length = length;
public PayloadAnalyzer(String field, byte[] data, int offset, int length) { }
super(PER_FIELD_REUSE_STRATEGY); }
setPayloadData(field, data, offset, length);
}
void setPayloadData(String field, byte[] data, int offset, int length) { /**
fieldToData.put(field, new PayloadData(data, offset, length)); * This Analyzer uses an MockTokenizer and PayloadFilter.
} */
private static class PayloadAnalyzer extends Analyzer {
@Override Map<String,PayloadData> fieldToData = new HashMap<>();
public TokenStreamComponents createComponents(String fieldName) {
PayloadData payload = fieldToData.get(fieldName);
Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream tokenStream = (payload != null) ?
new PayloadFilter(ts, payload.data, payload.offset, payload.length) : ts;
return new TokenStreamComponents(ts, tokenStream);
}
private static class PayloadData {
byte[] data;
int offset;
int length;
PayloadData(byte[] data, int offset, int length) { public PayloadAnalyzer() {
this.data = data; super(PER_FIELD_REUSE_STRATEGY);
this.offset = offset; }
this.length = length;
} public PayloadAnalyzer(String field, byte[] data, int offset, int length) {
} super(PER_FIELD_REUSE_STRATEGY);
setPayloadData(field, data, offset, length);
} }
void setPayloadData(String field, byte[] data, int offset, int length) {
fieldToData.put(field, new PayloadData(data, offset, length));
}
@Override
public TokenStreamComponents createComponents(String fieldName) {
PayloadData payload = fieldToData.get(fieldName);
Tokenizer ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream tokenStream = (payload != null) ?
new PayloadFilter(ts, fieldName, fieldToData) : ts;
return new TokenStreamComponents(ts, tokenStream);
}
}
/** /**
* This Filter adds payloads to the tokens. * This Filter adds payloads to the tokens.
*/ */
private static class PayloadFilter extends TokenFilter { private static class PayloadFilter extends TokenFilter {
private byte[] data; PayloadAttribute payloadAtt;
private int length; CharTermAttribute termAttribute;
private int offset; private Map<String,PayloadData> fieldToData;
private int startOffset; private String fieldName;
PayloadAttribute payloadAtt; private PayloadData payloadData;
CharTermAttribute termAttribute; private int offset;
public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { public PayloadFilter(TokenStream in, String fieldName, Map<String,PayloadData> fieldToData) {
super(in); super(in);
this.data = data; this.fieldToData = fieldToData;
this.length = length; this.fieldName = fieldName;
this.offset = offset; payloadAtt = addAttribute(PayloadAttribute.class);
this.startOffset = offset; termAttribute = addAttribute(CharTermAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class); }
termAttribute = addAttribute(CharTermAttribute.class);
}
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
boolean hasNext = input.incrementToken(); boolean hasNext = input.incrementToken();
if (!hasNext) { if (!hasNext) {
return false; return false;
} }
// Some values of the same field are to have payloads and others not // Some values of the same field are to have payloads and others not
if (offset + length <= data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) { if (offset + payloadData.length <= payloadData.data.length && !termAttribute.toString().endsWith("NO PAYLOAD")) {
BytesRef p = new BytesRef(data, offset, length); BytesRef p = new BytesRef(payloadData.data, offset, payloadData.length);
payloadAtt.setPayload(p); payloadAtt.setPayload(p);
offset += length; offset += payloadData.length;
} else { } else {
payloadAtt.setPayload(null); payloadAtt.setPayload(null);
} }
return true; return true;
}
@Override
public void reset() throws IOException {
super.reset();
this.payloadData = fieldToData.get(fieldName);
this.offset = payloadData.offset;
}
}
public void testThreadSafety() throws Exception {
final int numThreads = 5;
final int numDocs = atLeast(50);
final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);
Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
final String field = "test";
Thread[] ingesters = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
ingesters[i] = new Thread() {
@Override
public void run() {
try {
for (int j = 0; j < numDocs; j++) {
Document d = new Document();
d.add(new TextField(field, new PoolingPayloadTokenStream(pool)));
writer.addDocument(d);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
};
ingesters[i].start();
}
for (int i = 0; i < numThreads; i++) {
ingesters[i].join();
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null);
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsAndPositionsEnum tp = null;
while (terms.next() != null) {
String termText = terms.term().utf8ToString();
tp = terms.docsAndPositions(liveDocs, tp);
while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int freq = tp.freq();
for (int i = 0; i < freq; i++) {
tp.nextPosition();
final BytesRef payload = tp.getPayload();
assertEquals(termText, payload.utf8ToString());
} }
}
}
reader.close();
dir.close();
assertEquals(pool.size(), numThreads);
}
private class PoolingPayloadTokenStream extends TokenStream {
private byte[] payload;
private boolean first;
private ByteArrayPool pool;
private String term;
@Override CharTermAttribute termAtt;
public void reset() throws IOException { PayloadAttribute payloadAtt;
super.reset();
this.offset = startOffset; PoolingPayloadTokenStream(ByteArrayPool pool) {
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
term = new String(payload, 0, payload.length, utf8);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!first) return false;
first = false;
clearAttributes();
termAtt.append(term);
payloadAtt.setPayload(new BytesRef(payload));
return true;
}
@Override
public void close() throws IOException {
pool.release(payload);
}
}
private static class ByteArrayPool {
private List<byte[]> pool;
ByteArrayPool(int capacity, int size) {
pool = new ArrayList<>();
for (int i = 0; i < capacity; i++) {
pool.add(new byte[size]);
} }
} }
public void testThreadSafety() throws Exception { synchronized byte[] get() {
final int numThreads = 5; return pool.remove(0);
final int numDocs = atLeast(50);
final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);
Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
final String field = "test";
Thread[] ingesters = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
ingesters[i] = new Thread() {
@Override
public void run() {
try {
for (int j = 0; j < numDocs; j++) {
Document d = new Document();
d.add(new TextField(field, new PoolingPayloadTokenStream(pool)));
writer.addDocument(d);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.toString());
}
}
};
ingesters[i].start();
}
for (int i = 0; i < numThreads; i++) {
ingesters[i].join();
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator(null);
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsAndPositionsEnum tp = null;
while (terms.next() != null) {
String termText = terms.term().utf8ToString();
tp = terms.docsAndPositions(liveDocs, tp);
while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int freq = tp.freq();
for (int i = 0; i < freq; i++) {
tp.nextPosition();
final BytesRef payload = tp.getPayload();
assertEquals(termText, payload.utf8ToString());
}
}
}
reader.close();
dir.close();
assertEquals(pool.size(), numThreads);
} }
private class PoolingPayloadTokenStream extends TokenStream {
private byte[] payload;
private boolean first;
private ByteArrayPool pool;
private String term;
CharTermAttribute termAtt;
PayloadAttribute payloadAtt;
PoolingPayloadTokenStream(ByteArrayPool pool) {
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
term = new String(payload, 0, payload.length, utf8);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!first) return false;
first = false;
clearAttributes();
termAtt.append(term);
payloadAtt.setPayload(new BytesRef(payload));
return true;
}
@Override
public void close() throws IOException {
pool.release(payload);
}
synchronized void release(byte[] b) {
pool.add(b);
} }
private static class ByteArrayPool {
private List<byte[]> pool;
ByteArrayPool(int capacity, int size) { synchronized int size() {
pool = new ArrayList<>(); return pool.size();
for (int i = 0; i < capacity; i++) {
pool.add(new byte[size]);
}
}
synchronized byte[] get() {
return pool.remove(0);
}
synchronized void release(byte[] b) {
pool.add(b);
}
synchronized int size() {
return pool.size();
}
} }
}
public void testAcrossFields() throws Exception { public void testAcrossFields() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
@ -646,5 +645,4 @@ public class TestPayloads extends LuceneTestCase {
reader.close(); reader.close();
dir.close(); dir.close();
} }
} }

View File

@ -390,9 +390,9 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc
@Override @Override
public void updateDocument(Term term, public void updateDocument(Term term,
IndexDocument doc, Analyzer analyzer) IndexDocument doc)
throws IOException { throws IOException {
super.updateDocument(term, doc, analyzer); super.updateDocument(term, doc);
try { try {
if (waitAfterUpdate) { if (waitAfterUpdate) {
signal.countDown(); signal.countDown();

View File

@ -2040,22 +2040,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Document doc = new Document(); Document doc = new Document();
doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO)); doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 1)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
doc = new Document(); doc = new Document();
doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO)); doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 3)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
doc = new Document(); doc = new Document();
doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO)); doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 5)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
doc = new Document(); doc = new Document();
doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO)); doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO));
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
writer.addDocument(doc, analyzer); writer.addDocument(doc);
Document childDoc = doc(FIELD_NAME, "child document"); Document childDoc = doc(FIELD_NAME, "child document");
Document parentDoc = doc(FIELD_NAME, "parent document"); Document parentDoc = doc(FIELD_NAME, "parent document");

View File

@ -47,7 +47,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
* will be sorted while segments resulting from a flush will be in the order * will be sorted while segments resulting from a flush will be in the order
* in which documents have been added. * in which documents have been added.
* <p><b>NOTE</b>: Never use this policy if you rely on * <p><b>NOTE</b>: Never use this policy if you rely on
* {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments} * {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments}
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs. * to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
* so that the order of segments is predictable. For example, using * so that the order of segments is predictable. For example, using

View File

@ -103,11 +103,6 @@ public class RandomIndexWriter implements Closeable {
* @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument) * @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument)
*/ */
public <T extends IndexableField> void addDocument(final IndexDocument doc) throws IOException { public <T extends IndexableField> void addDocument(final IndexDocument doc) throws IOException {
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
addDocument(doc, w.getAnalyzer());
}
public <T extends IndexableField> void addDocument(final IndexDocument doc, Analyzer a) throws IOException {
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig()); LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
if (r.nextInt(5) == 3) { if (r.nextInt(5) == 3) {
// TODO: maybe, we should simply buffer up added docs // TODO: maybe, we should simply buffer up added docs
@ -141,9 +136,9 @@ public class RandomIndexWriter implements Closeable {
} }
}; };
} }
}, a); });
} else { } else {
w.addDocument(doc, a); w.addDocument(doc);
} }
maybeCommit(); maybeCommit();

View File

@ -536,7 +536,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist." log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
+ " Creating new index..."); + " Creating new index...");
SolrIndexWriter writer = SolrIndexWriter.create("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec); getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
writer.close(); writer.close();
} }

View File

@ -264,7 +264,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover
} }
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException { protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
return SolrIndexWriter.create(name, core.getNewIndexDir(), return SolrIndexWriter.create(core, name, core.getNewIndexDir(),
core.getDirectoryFactory(), false, core.getLatestSchema(), core.getDirectoryFactory(), false, core.getLatestSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec()); core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
} }

View File

@ -235,11 +235,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} }
if (cmd.isBlock()) { if (cmd.isBlock()) {
writer.updateDocuments(updateTerm, cmd, schema.getIndexAnalyzer()); writer.updateDocuments(updateTerm, cmd);
} else { } else {
Document luceneDocument = cmd.getLuceneDocument(); Document luceneDocument = cmd.getLuceneDocument();
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer); // SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
writer.updateDocument(updateTerm, luceneDocument, schema.getIndexAnalyzer()); writer.updateDocument(updateTerm, luceneDocument);
} }
// SolrCore.verbose("updateDocument",updateTerm,"DONE"); // SolrCore.verbose("updateDocument",updateTerm,"DONE");
@ -264,9 +264,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} else { } else {
// allow duplicates // allow duplicates
if (cmd.isBlock()) { if (cmd.isBlock()) {
writer.addDocuments(cmd, schema.getIndexAnalyzer()); writer.addDocuments(cmd);
} else { } else {
writer.addDocument(cmd.getLuceneDocument(), schema.getIndexAnalyzer()); writer.addDocument(cmd.getLuceneDocument());
} }
if (ulog != null) ulog.add(cmd); if (ulog != null) ulog.add(cmd);
@ -442,8 +442,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core); RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
try { try {
IndexWriter writer = iw.get(); IndexWriter writer = iw.get();
writer.updateDocument(idTerm, luceneDocument, cmd.getReq().getSchema() writer.updateDocument(idTerm, luceneDocument);
.getIndexAnalyzer());
for (Query q : dbqList) { for (Query q : dbqList) {
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema())); writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));

View File

@ -17,6 +17,14 @@
package org.apache.solr.update; package org.apache.solr.update;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
@ -24,16 +32,14 @@ import org.apache.lucene.util.Version;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.MapSerializable; import org.apache.solr.core.MapSerializable;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.PluginInfo; import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Map;
import static org.apache.solr.core.Config.assertWarnOrFail; import static org.apache.solr.core.Config.assertWarnOrFail;
/** /**
@ -180,12 +186,23 @@ public class SolrIndexConfig implements MapSerializable {
return l.isEmpty() ? def : l.get(0); return l.isEmpty() ? def : l.get(0);
} }
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { private static class DelayedSchemaAnalyzer extends DelegatingAnalyzerWrapper {
// so that we can update the analyzer on core reload, we pass null private final SolrCore core;
// for the default analyzer, and explicitly pass an analyzer on
// appropriate calls to IndexWriter public DelayedSchemaAnalyzer(SolrCore core) {
super(PER_FIELD_REUSE_STRATEGY);
IndexWriterConfig iwc = new IndexWriterConfig(null); this.core = core;
}
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return core.getLatestSchema().getIndexAnalyzer();
}
}
public IndexWriterConfig toIndexWriterConfig(SolrCore core) {
IndexSchema schema = core.getLatestSchema();
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
if (maxBufferedDocs != -1) if (maxBufferedDocs != -1)
iwc.setMaxBufferedDocs(maxBufferedDocs); iwc.setMaxBufferedDocs(maxBufferedDocs);

View File

@ -121,7 +121,7 @@ public class SolrIndexSplitter {
} else { } else {
SolrCore core = searcher.getCore(); SolrCore core = searcher.getCore();
String path = paths.get(partitionNumber); String path = paths.get(partitionNumber);
iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path, iw = SolrIndexWriter.create(core, "SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
core.getDirectoryFactory(), true, core.getLatestSchema(), core.getDirectoryFactory(), true, core.getLatestSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec()); core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
} }

View File

@ -27,8 +27,9 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.IOUtils;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -56,12 +57,12 @@ public class SolrIndexWriter extends IndexWriter {
private InfoStream infoStream; private InfoStream infoStream;
private Directory directory; private Directory directory;
public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { public static SolrIndexWriter create(SolrCore core, String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
SolrIndexWriter w = null; SolrIndexWriter w = null;
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType); final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
try { try {
w = new SolrIndexWriter(name, path, d, create, schema, w = new SolrIndexWriter(core, name, path, d, create, schema,
config, delPolicy, codec); config, delPolicy, codec);
w.setDirectoryFactory(directoryFactory); w.setDirectoryFactory(directoryFactory);
return w; return w;
@ -73,9 +74,9 @@ public class SolrIndexWriter extends IndexWriter {
} }
} }
private SolrIndexWriter(String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException { private SolrIndexWriter(SolrCore core, String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
super(directory, super(directory,
config.toIndexWriterConfig(schema). config.toIndexWriterConfig(core).
setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND). setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND).
setIndexDeletionPolicy(delPolicy).setCodec(codec) setIndexDeletionPolicy(delPolicy).setCodec(codec)
); );

View File

@ -116,7 +116,7 @@ public class TestConfig extends SolrTestCaseJ4 {
assertEquals("default useCompoundFile", false, sic.getUseCompoundFile()); assertEquals("default useCompoundFile", false, sic.getUseCompoundFile());
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema); IndexWriterConfig iwc = sic.toIndexWriterConfig(h.getCore());
assertNotNull("null mp", iwc.getMergePolicy()); assertNotNull("null mp", iwc.getMergePolicy());
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);

View File

@ -31,7 +31,7 @@ public class TestInfoStreamLogging extends SolrTestCaseJ4 {
} }
public void testIndexConfig() throws Exception { public void testIndexConfig() throws Exception {
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream); assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream);
} }

View File

@ -45,7 +45,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
public void testDefaultMergePolicyConfig() throws Exception { public void testDefaultMergePolicyConfig() throws Exception {
initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml"); initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml");
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
assertEquals(false, iwc.getUseCompoundFile()); assertEquals(false, iwc.getUseCompoundFile());
TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
@ -61,7 +61,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
= Boolean.parseBoolean(System.getProperty("useCompoundFile")); = Boolean.parseBoolean(System.getProperty("useCompoundFile"));
initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml"); initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml");
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
assertEquals(expectCFS, iwc.getUseCompoundFile()); assertEquals(expectCFS, iwc.getUseCompoundFile());
@ -81,7 +81,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
= Boolean.parseBoolean(System.getProperty("useCompoundFile")); = Boolean.parseBoolean(System.getProperty("useCompoundFile"));
initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml"); initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml");
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
assertEquals(expectCFS, iwc.getUseCompoundFile()); assertEquals(expectCFS, iwc.getUseCompoundFile());
@ -122,7 +122,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
System.setProperty("solr.test.log.merge.policy", mpClass.getName()); System.setProperty("solr.test.log.merge.policy", mpClass.getName());
initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml"); initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml");
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
// verify some props set to -1 get lucene internal defaults // verify some props set to -1 get lucene internal defaults
assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs); assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);

View File

@ -47,7 +47,7 @@ public class TestSolrIndexConfig extends SolrTestCaseJ4 {
public void testIndexConfigParsing() throws Exception { public void testIndexConfigParsing() throws Exception {
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
try { try {
checkIndexWriterConfig(iwc); checkIndexWriterConfig(iwc);
} finally { } finally {

View File

@ -17,6 +17,10 @@ package org.apache.solr.update;
* limitations under the License. * limitations under the License.
*/ */
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SimpleMergedSegmentWarmer; import org.apache.lucene.index.SimpleMergedSegmentWarmer;
@ -26,13 +30,10 @@ import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.TestMergePolicyConfig; import org.apache.solr.core.TestMergePolicyConfig;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory; import org.apache.solr.schema.IndexSchemaFactory;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
/** /**
* Testcase for {@link SolrIndexConfig} * Testcase for {@link SolrIndexConfig}
* *
@ -40,13 +41,19 @@ import java.io.IOException;
*/ */
public class SolrIndexConfigTest extends SolrTestCaseJ4 { public class SolrIndexConfigTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
}
@Test @Test
public void testFailingSolrIndexConfigCreation() { public void testFailingSolrIndexConfigCreation() {
try { try {
SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml"); SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml");
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null); SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
solrIndexConfig.toIndexWriterConfig(indexSchema); h.getCore().setLatestSchema(indexSchema);
solrIndexConfig.toIndexWriterConfig(h.getCore());
fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail "); fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail ");
} catch (Exception e) { } catch (Exception e) {
// it failed as expected // it failed as expected
@ -61,8 +68,9 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
null); null);
assertNotNull(solrIndexConfig); assertNotNull(solrIndexConfig);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); h.getCore().setLatestSchema(indexSchema);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
assertNotNull("null mp", iwc.getMergePolicy()); assertNotNull("null mp", iwc.getMergePolicy());
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
@ -87,7 +95,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
assertEquals(SimpleMergedSegmentWarmer.class.getName(), assertEquals(SimpleMergedSegmentWarmer.class.getName(),
solrIndexConfig.mergedSegmentWarmerInfo.className); solrIndexConfig.mergedSegmentWarmerInfo.className);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); h.getCore().setLatestSchema(indexSchema);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass()); assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass());
} }