mirror of https://github.com/apache/lucene.git
LUCENE-6212: remove per-doc analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b5db48c783
commit
1529c57ca1
|
@ -392,6 +392,11 @@ API Changes
|
|||
Weight.scoresDocsOutOfOrder and LeafCollector.acceptsDocsOutOfOrder have been
|
||||
removed and boolean queries now always score in order.
|
||||
|
||||
* LUCENE-6212: IndexWriter no longer accepts per-document Analyzer to
|
||||
add/updateDocument. These methods were trappy as they made it
|
||||
easy to accidentally index tokens that were not easily
|
||||
searchable. (Mike McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
||||
|
|
|
@ -75,7 +75,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
doc.add(new TextField("title", dummyTitle, Field.Store.YES));
|
||||
String dummyContent = "there is some content written here";
|
||||
doc.add(new TextField("contents", dummyContent, Field.Store.YES));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// try the search over the first doc
|
||||
|
@ -96,7 +96,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
|
||||
String dogmasContents = "white men can't jump";
|
||||
doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
directoryReader.close();
|
||||
|
|
|
@ -16,11 +16,15 @@
|
|||
*/
|
||||
package org.apache.lucene.classification;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
|
@ -32,9 +36,6 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Base class for testing {@link Classifier}s
|
||||
*/
|
||||
|
@ -113,7 +114,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
|
||||
double score = classificationResult.getScore();
|
||||
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
|
||||
updateSampleIndex(analyzer);
|
||||
updateSampleIndex();
|
||||
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
|
||||
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
|
||||
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
|
||||
|
@ -125,7 +126,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void populateSampleIndex(Analyzer analyzer) throws IOException {
|
||||
indexWriter.deleteAll();
|
||||
indexWriter.close();
|
||||
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
|
||||
indexWriter.commit();
|
||||
|
||||
String text;
|
||||
|
@ -138,7 +140,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
|
||||
|
@ -146,7 +148,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "And there's a threshold question that he has to answer for the American people and " +
|
||||
|
@ -155,7 +157,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
|
||||
|
@ -164,7 +166,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
|
||||
|
@ -173,7 +175,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "So, about all those experts and analysts who've spent the past year or so saying " +
|
||||
|
@ -181,7 +183,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
|
||||
|
@ -190,12 +192,12 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "unlabeled doc";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
indexWriter.commit();
|
||||
}
|
||||
|
@ -217,7 +219,8 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
|
||||
indexWriter.deleteAll();
|
||||
indexWriter.close();
|
||||
indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
|
||||
indexWriter.commit();
|
||||
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
|
@ -232,7 +235,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(textFieldName, createRandomString(random), ft));
|
||||
doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.commit();
|
||||
}
|
||||
|
@ -246,7 +249,7 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
return builder.toString();
|
||||
}
|
||||
|
||||
private void updateSampleIndex(Analyzer analyzer) throws Exception {
|
||||
private void updateSampleIndex() throws Exception {
|
||||
|
||||
String text;
|
||||
|
||||
|
@ -256,54 +259,54 @@ public abstract class ClassificationTestBase<T> extends LuceneTestCase {
|
|||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role.";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Illinois becomes 16th state to allow same-sex marriage.";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "politics", ft));
|
||||
doc.add(new Field(booleanFieldName, "true", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report.";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement.";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009.";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
doc.add(new Field(categoryFieldName, "technology", ft));
|
||||
doc.add(new Field(booleanFieldName, "false", ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
text = "second unlabeled doc";
|
||||
doc.add(new Field(textFieldName, text, ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
|
||||
indexWriter.commit();
|
||||
}
|
||||
|
|
|
@ -64,8 +64,6 @@ public class DataSplitterTest extends LuceneTestCase {
|
|||
ft.setStoreTermVectorOffsets(true);
|
||||
ft.setStoreTermVectorPositions(true);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
Document doc;
|
||||
Random rnd = random();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
|
@ -73,7 +71,7 @@ public class DataSplitterTest extends LuceneTestCase {
|
|||
doc.add(new Field(idFieldName, Integer.toString(i), ft));
|
||||
doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
|
||||
doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
|
||||
indexWriter.commit();
|
||||
|
|
|
@ -55,14 +55,12 @@ public class DocToDoubleVectorUtilsTest extends LuceneTestCase {
|
|||
ft.setStoreTermVectorOffsets(true);
|
||||
ft.setStoreTermVectorPositions(true);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
Document doc;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
doc = new Document();
|
||||
doc.add(new Field("id", Integer.toString(i), ft));
|
||||
doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
|
||||
indexWriter.addDocument(doc, analyzer);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
|
||||
indexWriter.commit();
|
||||
|
|
|
@ -1127,22 +1127,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void addDocument(IndexDocument doc) throws IOException {
|
||||
addDocument(doc, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index, using the provided analyzer instead of the
|
||||
* value of {@link #getAnalyzer()}.
|
||||
*
|
||||
* <p>See {@link #addDocument(IndexDocument)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
* flushing/merging temporary free space requirements.</p>
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException {
|
||||
updateDocument(null, doc, analyzer);
|
||||
updateDocument(null, doc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1183,22 +1168,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<? extends IndexDocument> docs) throws IOException {
|
||||
addDocuments(docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds a block of documents, analyzed using the
|
||||
* provided analyzer, with sequentially assigned document
|
||||
* IDs, such that an external reader will see all or none
|
||||
* of the documents.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
|
||||
updateDocuments(null, docs, analyzer);
|
||||
updateDocuments(null, docs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1215,24 +1185,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs) throws IOException {
|
||||
updateDocuments(delTerm, docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically deletes documents matching the provided
|
||||
* delTerm and adds a block of documents, analyzed using
|
||||
* the provided analyzer, with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* See {@link #addDocuments(Iterable)}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<? extends IndexDocument> docs, Analyzer analyzer) throws IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
boolean success = false;
|
||||
|
@ -1384,26 +1336,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
*/
|
||||
public void updateDocument(Term term, IndexDocument doc) throws IOException {
|
||||
ensureOpen();
|
||||
updateDocument(term, doc, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a document by first deleting the document(s)
|
||||
* containing <code>term</code> and then adding the new
|
||||
* document. The delete and then add are atomic as seen
|
||||
* by a reader on the same index (flush may happen only after
|
||||
* the add).
|
||||
*
|
||||
* @param term the term to identify the document(s) to be
|
||||
* deleted
|
||||
* @param doc the document to be added
|
||||
* @param analyzer the analyzer to use when analyzing the document
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer)
|
||||
throws IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
boolean success = false;
|
||||
try {
|
||||
|
|
|
@ -47,15 +47,6 @@ public class TrackingIndexWriter {
|
|||
this.writer = writer;
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#updateDocument(Term,IndexDocument,Analyzer)}
|
||||
* and returns the generation that reflects this change. */
|
||||
public long updateDocument(Term t, IndexDocument d, Analyzer a) throws IOException {
|
||||
writer.updateDocument(t, d, a);
|
||||
// Return gen as of when indexing finished:
|
||||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#updateDocument(Term,IndexDocument)} and
|
||||
* returns the generation that reflects this change. */
|
||||
|
@ -65,15 +56,6 @@ public class TrackingIndexWriter {
|
|||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#updateDocuments(Term,Iterable,Analyzer)}
|
||||
* and returns the generation that reflects this change. */
|
||||
public long updateDocuments(Term t, Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
|
||||
writer.updateDocuments(t, docs, a);
|
||||
// Return gen as of when indexing finished:
|
||||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#updateDocuments(Term,Iterable)} and returns
|
||||
* the generation that reflects this change. */
|
||||
|
@ -123,24 +105,6 @@ public class TrackingIndexWriter {
|
|||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#addDocument(IndexDocument,Analyzer)} and
|
||||
* returns the generation that reflects this change. */
|
||||
public long addDocument(IndexDocument d, Analyzer a) throws IOException {
|
||||
writer.addDocument(d, a);
|
||||
// Return gen as of when indexing finished:
|
||||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link
|
||||
* IndexWriter#addDocuments(Iterable,Analyzer)} and
|
||||
* returns the generation that reflects this change. */
|
||||
public long addDocuments(Iterable<? extends IndexDocument> docs, Analyzer a) throws IOException {
|
||||
writer.addDocuments(docs, a);
|
||||
// Return gen as of when indexing finished:
|
||||
return indexingGen.get();
|
||||
}
|
||||
|
||||
/** Calls {@link IndexWriter#addDocument(IndexDocument)}
|
||||
* and returns the generation that reflects this change. */
|
||||
public long addDocument(IndexDocument d) throws IOException {
|
||||
|
|
|
@ -305,7 +305,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory());
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a);
|
||||
final Document doc = new Document();
|
||||
final FieldType ft = new FieldType();
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
|
@ -315,7 +315,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
ft.setStoreTermVectorOffsets(true);
|
||||
doc.add(new Field("f", "a", ft));
|
||||
doc.add(new Field("f", "a", ft));
|
||||
writer.addDocument(doc, a);
|
||||
writer.addDocument(doc);
|
||||
final LeafReader reader = getOnlySegmentReader(writer.getReader());
|
||||
final Fields fields = reader.getTermVectors(0);
|
||||
final Terms terms = fields.terms("f");
|
||||
|
|
|
@ -392,27 +392,35 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
// LUCENE-1208
|
||||
public void testExceptionJustBeforeFlush() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(2),
|
||||
new TestPoint1());
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "a field", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
final AtomicBoolean doCrash = new AtomicBoolean();
|
||||
|
||||
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||
TokenStream stream = tokenizer;
|
||||
if (doCrash.get()) {
|
||||
stream = new CrashingFilter(fieldName, stream);
|
||||
}
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
|
||||
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
|
||||
newIndexWriterConfig(analyzer)
|
||||
.setMaxBufferedDocs(2),
|
||||
new TestPoint1());
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "a field", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
Document crashDoc = new Document();
|
||||
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
|
||||
doCrash.set(true);
|
||||
try {
|
||||
w.addDocument(crashDoc, analyzer);
|
||||
w.addDocument(crashDoc);
|
||||
fail("did not hit expected exception");
|
||||
} catch (IOException ioe) {
|
||||
// expected
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -390,9 +390,9 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc
|
|||
|
||||
@Override
|
||||
public void updateDocument(Term term,
|
||||
IndexDocument doc, Analyzer analyzer)
|
||||
IndexDocument doc)
|
||||
throws IOException {
|
||||
super.updateDocument(term, doc, analyzer);
|
||||
super.updateDocument(term, doc);
|
||||
try {
|
||||
if (waitAfterUpdate) {
|
||||
signal.countDown();
|
||||
|
|
|
@ -2040,22 +2040,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
Document doc = new Document();
|
||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO));
|
||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 1));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO));
|
||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 3));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO));
|
||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 5));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO));
|
||||
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
|
||||
writer.addDocument(doc, analyzer);
|
||||
writer.addDocument(doc);
|
||||
|
||||
Document childDoc = doc(FIELD_NAME, "child document");
|
||||
Document parentDoc = doc(FIELD_NAME, "parent document");
|
||||
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
|||
* will be sorted while segments resulting from a flush will be in the order
|
||||
* in which documents have been added.
|
||||
* <p><b>NOTE</b>: Never use this policy if you rely on
|
||||
* {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments}
|
||||
* {@link IndexWriter#addDocuments(Iterable) IndexWriter.addDocuments}
|
||||
* to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
|
||||
* <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
|
||||
* so that the order of segments is predictable. For example, using
|
||||
|
|
|
@ -103,11 +103,6 @@ public class RandomIndexWriter implements Closeable {
|
|||
* @see IndexWriter#addDocument(org.apache.lucene.index.IndexDocument)
|
||||
*/
|
||||
public <T extends IndexableField> void addDocument(final IndexDocument doc) throws IOException {
|
||||
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
|
||||
addDocument(doc, w.getAnalyzer());
|
||||
}
|
||||
|
||||
public <T extends IndexableField> void addDocument(final IndexDocument doc, Analyzer a) throws IOException {
|
||||
LuceneTestCase.maybeChangeLiveIndexWriterConfig(r, w.getConfig());
|
||||
if (r.nextInt(5) == 3) {
|
||||
// TODO: maybe, we should simply buffer up added docs
|
||||
|
@ -141,9 +136,9 @@ public class RandomIndexWriter implements Closeable {
|
|||
}
|
||||
};
|
||||
}
|
||||
}, a);
|
||||
});
|
||||
} else {
|
||||
w.addDocument(doc, a);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
maybeCommit();
|
||||
|
|
|
@ -536,7 +536,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
|
||||
+ " Creating new index...");
|
||||
|
||||
SolrIndexWriter writer = SolrIndexWriter.create("SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
||||
SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
||||
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
|
||||
writer.close();
|
||||
}
|
||||
|
|
|
@ -264,7 +264,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover
|
|||
}
|
||||
|
||||
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
|
||||
return SolrIndexWriter.create(name, core.getNewIndexDir(),
|
||||
return SolrIndexWriter.create(core, name, core.getNewIndexDir(),
|
||||
core.getDirectoryFactory(), false, core.getLatestSchema(),
|
||||
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
||||
}
|
||||
|
|
|
@ -235,11 +235,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
}
|
||||
|
||||
if (cmd.isBlock()) {
|
||||
writer.updateDocuments(updateTerm, cmd, schema.getIndexAnalyzer());
|
||||
writer.updateDocuments(updateTerm, cmd);
|
||||
} else {
|
||||
Document luceneDocument = cmd.getLuceneDocument();
|
||||
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
|
||||
writer.updateDocument(updateTerm, luceneDocument, schema.getIndexAnalyzer());
|
||||
writer.updateDocument(updateTerm, luceneDocument);
|
||||
}
|
||||
// SolrCore.verbose("updateDocument",updateTerm,"DONE");
|
||||
|
||||
|
@ -264,9 +264,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
} else {
|
||||
// allow duplicates
|
||||
if (cmd.isBlock()) {
|
||||
writer.addDocuments(cmd, schema.getIndexAnalyzer());
|
||||
writer.addDocuments(cmd);
|
||||
} else {
|
||||
writer.addDocument(cmd.getLuceneDocument(), schema.getIndexAnalyzer());
|
||||
writer.addDocument(cmd.getLuceneDocument());
|
||||
}
|
||||
|
||||
if (ulog != null) ulog.add(cmd);
|
||||
|
@ -442,8 +442,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
||||
try {
|
||||
IndexWriter writer = iw.get();
|
||||
writer.updateDocument(idTerm, luceneDocument, cmd.getReq().getSchema()
|
||||
.getIndexAnalyzer());
|
||||
writer.updateDocument(idTerm, luceneDocument);
|
||||
|
||||
for (Query q : dbqList) {
|
||||
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));
|
||||
|
|
|
@ -17,6 +17,14 @@
|
|||
|
||||
package org.apache.solr.update;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
|
@ -24,16 +32,14 @@ import org.apache.lucene.util.Version;
|
|||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.MapSerializable;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.solr.core.Config.assertWarnOrFail;
|
||||
|
||||
/**
|
||||
|
@ -180,12 +186,23 @@ public class SolrIndexConfig implements MapSerializable {
|
|||
return l.isEmpty() ? def : l.get(0);
|
||||
}
|
||||
|
||||
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) {
|
||||
// so that we can update the analyzer on core reload, we pass null
|
||||
// for the default analyzer, and explicitly pass an analyzer on
|
||||
// appropriate calls to IndexWriter
|
||||
private static class DelayedSchemaAnalyzer extends DelegatingAnalyzerWrapper {
|
||||
private final SolrCore core;
|
||||
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(null);
|
||||
public DelayedSchemaAnalyzer(SolrCore core) {
|
||||
super(PER_FIELD_REUSE_STRATEGY);
|
||||
this.core = core;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return core.getLatestSchema().getIndexAnalyzer();
|
||||
}
|
||||
}
|
||||
|
||||
public IndexWriterConfig toIndexWriterConfig(SolrCore core) {
|
||||
IndexSchema schema = core.getLatestSchema();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
|
||||
if (maxBufferedDocs != -1)
|
||||
iwc.setMaxBufferedDocs(maxBufferedDocs);
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ public class SolrIndexSplitter {
|
|||
} else {
|
||||
SolrCore core = searcher.getCore();
|
||||
String path = paths.get(partitionNumber);
|
||||
iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
|
||||
iw = SolrIndexWriter.create(core, "SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
|
||||
core.getDirectoryFactory(), true, core.getLatestSchema(),
|
||||
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
|
||||
}
|
||||
|
|
|
@ -27,8 +27,9 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -56,12 +57,12 @@ public class SolrIndexWriter extends IndexWriter {
|
|||
private InfoStream infoStream;
|
||||
private Directory directory;
|
||||
|
||||
public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||
public static SolrIndexWriter create(SolrCore core, String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||
|
||||
SolrIndexWriter w = null;
|
||||
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
|
||||
try {
|
||||
w = new SolrIndexWriter(name, path, d, create, schema,
|
||||
w = new SolrIndexWriter(core, name, path, d, create, schema,
|
||||
config, delPolicy, codec);
|
||||
w.setDirectoryFactory(directoryFactory);
|
||||
return w;
|
||||
|
@ -73,9 +74,9 @@ public class SolrIndexWriter extends IndexWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private SolrIndexWriter(String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||
private SolrIndexWriter(SolrCore core, String name, String path, Directory directory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec) throws IOException {
|
||||
super(directory,
|
||||
config.toIndexWriterConfig(schema).
|
||||
config.toIndexWriterConfig(core).
|
||||
setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND).
|
||||
setIndexDeletionPolicy(delPolicy).setCodec(codec)
|
||||
);
|
||||
|
|
|
@ -116,7 +116,7 @@ public class TestConfig extends SolrTestCaseJ4 {
|
|||
assertEquals("default useCompoundFile", false, sic.getUseCompoundFile());
|
||||
|
||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||
IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema);
|
||||
IndexWriterConfig iwc = sic.toIndexWriterConfig(h.getCore());
|
||||
|
||||
assertNotNull("null mp", iwc.getMergePolicy());
|
||||
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TestInfoStreamLogging extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
public void testIndexConfig() throws Exception {
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
|
||||
assertTrue(iwc.getInfoStream() instanceof LoggingInfoStream);
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
|||
|
||||
public void testDefaultMergePolicyConfig() throws Exception {
|
||||
initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml");
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
assertEquals(false, iwc.getUseCompoundFile());
|
||||
|
||||
TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
|
||||
|
@ -61,7 +61,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
|||
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
||||
|
||||
initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml");
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
||||
|
||||
|
||||
|
@ -81,7 +81,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
|||
= Boolean.parseBoolean(System.getProperty("useCompoundFile"));
|
||||
|
||||
initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml");
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
assertEquals(expectCFS, iwc.getUseCompoundFile());
|
||||
|
||||
|
||||
|
@ -122,7 +122,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
|
|||
System.setProperty("solr.test.log.merge.policy", mpClass.getName());
|
||||
|
||||
initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml");
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
|
||||
// verify some props set to -1 get lucene internal defaults
|
||||
assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);
|
||||
|
|
|
@ -47,7 +47,7 @@ public class TestSolrIndexConfig extends SolrTestCaseJ4 {
|
|||
|
||||
|
||||
public void testIndexConfigParsing() throws Exception {
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema());
|
||||
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
|
||||
try {
|
||||
checkIndexWriterConfig(iwc);
|
||||
} finally {
|
||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.solr.update;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
|
||||
|
@ -26,13 +30,10 @@ import org.apache.solr.core.SolrConfig;
|
|||
import org.apache.solr.core.TestMergePolicyConfig;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.IndexSchemaFactory;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Testcase for {@link SolrIndexConfig}
|
||||
*
|
||||
|
@ -40,13 +41,19 @@ import java.io.IOException;
|
|||
*/
|
||||
public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailingSolrIndexConfigCreation() {
|
||||
try {
|
||||
SolrConfig solrConfig = new SolrConfig("bad-mp-solrconfig.xml");
|
||||
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
|
||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||
solrIndexConfig.toIndexWriterConfig(indexSchema);
|
||||
h.getCore().setLatestSchema(indexSchema);
|
||||
solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||
fail("a mergePolicy should have an empty constructor in order to be instantiated in Solr thus this should fail ");
|
||||
} catch (Exception e) {
|
||||
// it failed as expected
|
||||
|
@ -62,7 +69,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
|||
assertNotNull(solrIndexConfig);
|
||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||
|
||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema);
|
||||
h.getCore().setLatestSchema(indexSchema);
|
||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||
|
||||
assertNotNull("null mp", iwc.getMergePolicy());
|
||||
assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy);
|
||||
|
@ -87,7 +95,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
|
|||
assertEquals(SimpleMergedSegmentWarmer.class.getName(),
|
||||
solrIndexConfig.mergedSegmentWarmerInfo.className);
|
||||
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
|
||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema);
|
||||
h.getCore().setLatestSchema(indexSchema);
|
||||
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
|
||||
assertEquals(SimpleMergedSegmentWarmer.class, iwc.getMergedSegmentWarmer().getClass());
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue