mirror of https://github.com/apache/lucene.git
LUCENE-2295: remove maxFieldLength (trunk)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1060340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e43fdc9654
commit
2a0484bd40
|
@ -84,8 +84,7 @@ public class IndexHTML {
|
||||||
}
|
}
|
||||||
writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig(
|
writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig(
|
||||||
Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT))
|
Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT))
|
||||||
.setMaxFieldLength(1000000).setOpenMode(
|
.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
|
||||||
create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
|
|
||||||
indexDocs(root, index, create); // add new docs
|
indexDocs(root, index, create); // add new docs
|
||||||
|
|
||||||
System.out.println("Optimizing index...");
|
System.out.println("Optimizing index...");
|
||||||
|
|
|
@ -63,8 +63,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
|
|
||||||
fieldState.reset(docState.doc.getBoost());
|
fieldState.reset(docState.doc.getBoost());
|
||||||
|
|
||||||
final int maxFieldLength = docState.maxFieldLength;
|
|
||||||
|
|
||||||
final boolean doInvert = consumer.start(fields, count);
|
final boolean doInvert = consumer.start(fields, count);
|
||||||
|
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
|
@ -171,12 +169,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
if (!success)
|
if (!success)
|
||||||
docState.docWriter.setAborting();
|
docState.docWriter.setAborting();
|
||||||
}
|
}
|
||||||
|
fieldState.length++;
|
||||||
fieldState.position++;
|
fieldState.position++;
|
||||||
if (++fieldState.length >= maxFieldLength) {
|
|
||||||
if (docState.infoStream != null)
|
|
||||||
docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
hasMoreTokens = stream.incrementToken();
|
hasMoreTokens = stream.incrementToken();
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,7 +127,6 @@ final class DocumentsWriter {
|
||||||
private boolean aborting; // True if an abort is pending
|
private boolean aborting; // True if an abort is pending
|
||||||
|
|
||||||
PrintStream infoStream;
|
PrintStream infoStream;
|
||||||
int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
|
|
||||||
Similarity similarity;
|
Similarity similarity;
|
||||||
|
|
||||||
// max # simultaneous threads; if there are more than
|
// max # simultaneous threads; if there are more than
|
||||||
|
@ -140,7 +139,6 @@ final class DocumentsWriter {
|
||||||
static class DocState {
|
static class DocState {
|
||||||
DocumentsWriter docWriter;
|
DocumentsWriter docWriter;
|
||||||
Analyzer analyzer;
|
Analyzer analyzer;
|
||||||
int maxFieldLength;
|
|
||||||
PrintStream infoStream;
|
PrintStream infoStream;
|
||||||
Similarity similarity;
|
Similarity similarity;
|
||||||
int docID;
|
int docID;
|
||||||
|
@ -191,6 +189,7 @@ final class DocumentsWriter {
|
||||||
/**
|
/**
|
||||||
* Allocate bytes used from shared pool.
|
* Allocate bytes used from shared pool.
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
protected byte[] newBuffer(int size) {
|
protected byte[] newBuffer(int size) {
|
||||||
assert size == PER_DOC_BLOCK_SIZE;
|
assert size == PER_DOC_BLOCK_SIZE;
|
||||||
return perDocAllocator.getByteBlock();
|
return perDocAllocator.getByteBlock();
|
||||||
|
@ -358,13 +357,6 @@ final class DocumentsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void setMaxFieldLength(int maxFieldLength) {
|
|
||||||
this.maxFieldLength = maxFieldLength;
|
|
||||||
for(int i=0;i<threadStates.length;i++) {
|
|
||||||
threadStates[i].docState.maxFieldLength = maxFieldLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void setSimilarity(Similarity similarity) {
|
synchronized void setSimilarity(Similarity similarity) {
|
||||||
this.similarity = similarity;
|
this.similarity = similarity;
|
||||||
for(int i=0;i<threadStates.length;i++) {
|
for(int i=0;i<threadStates.length;i++) {
|
||||||
|
|
|
@ -35,7 +35,6 @@ final class DocumentsWriterThreadState {
|
||||||
public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
|
public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
|
||||||
this.docWriter = docWriter;
|
this.docWriter = docWriter;
|
||||||
docState = new DocumentsWriter.DocState();
|
docState = new DocumentsWriter.DocState();
|
||||||
docState.maxFieldLength = docWriter.maxFieldLength;
|
|
||||||
docState.infoStream = docWriter.infoStream;
|
docState.infoStream = docWriter.infoStream;
|
||||||
docState.similarity = docWriter.similarity;
|
docState.similarity = docWriter.similarity;
|
||||||
docState.docWriter = docWriter;
|
docState.docWriter = docWriter;
|
||||||
|
|
|
@ -662,9 +662,6 @@ public class IndexWriter implements Closeable {
|
||||||
* IndexWriter. Additionally, calling {@link #getConfig()} and changing the
|
* IndexWriter. Additionally, calling {@link #getConfig()} and changing the
|
||||||
* parameters does not affect that IndexWriter instance.
|
* parameters does not affect that IndexWriter instance.
|
||||||
* <p>
|
* <p>
|
||||||
* <b>NOTE:</b> by default, {@link IndexWriterConfig#getMaxFieldLength()}
|
|
||||||
* returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to
|
|
||||||
* whether this setting fits your application.
|
|
||||||
*
|
*
|
||||||
* @param d
|
* @param d
|
||||||
* the index directory. The index is either created or appended
|
* the index directory. The index is either created or appended
|
||||||
|
@ -689,7 +686,6 @@ public class IndexWriter implements Closeable {
|
||||||
directory = d;
|
directory = d;
|
||||||
analyzer = conf.getAnalyzer();
|
analyzer = conf.getAnalyzer();
|
||||||
infoStream = defaultInfoStream;
|
infoStream = defaultInfoStream;
|
||||||
maxFieldLength = conf.getMaxFieldLength();
|
|
||||||
termIndexInterval = conf.getTermIndexInterval();
|
termIndexInterval = conf.getTermIndexInterval();
|
||||||
mergePolicy = conf.getMergePolicy();
|
mergePolicy = conf.getMergePolicy();
|
||||||
mergePolicy.setIndexWriter(this);
|
mergePolicy.setIndexWriter(this);
|
||||||
|
@ -768,7 +764,6 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
|
docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
|
||||||
docWriter.setInfoStream(infoStream);
|
docWriter.setInfoStream(infoStream);
|
||||||
docWriter.setMaxFieldLength(maxFieldLength);
|
|
||||||
|
|
||||||
// Default deleter (for backwards compatibility) is
|
// Default deleter (for backwards compatibility) is
|
||||||
// KeepOnlyLastCommitDeleter:
|
// KeepOnlyLastCommitDeleter:
|
||||||
|
@ -987,6 +982,7 @@ public class IndexWriter implements Closeable {
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void close() throws CorruptIndexException, IOException {
|
public void close() throws CorruptIndexException, IOException {
|
||||||
close(true);
|
close(true);
|
||||||
}
|
}
|
||||||
|
@ -1177,25 +1173,7 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The maximum number of terms that will be indexed for a single field in a
|
* Adds a document to this index.
|
||||||
* document. This limits the amount of memory required for indexing, so that
|
|
||||||
* collections with very large files will not crash the indexing process by
|
|
||||||
* running out of memory.<p/>
|
|
||||||
* Note that this effectively truncates large documents, excluding from the
|
|
||||||
* index terms that occur further in the document. If you know your source
|
|
||||||
* documents are large, be sure to set this value high enough to accommodate
|
|
||||||
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
|
|
||||||
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
|
|
||||||
* By default, no more than 10,000 terms will be indexed for a field.
|
|
||||||
*
|
|
||||||
* @see MaxFieldLength
|
|
||||||
*/
|
|
||||||
private int maxFieldLength;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a document to this index. If the document contains more than
|
|
||||||
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field,
|
|
||||||
* the remainder are discarded.
|
|
||||||
*
|
*
|
||||||
* <p> Note that if an Exception is hit (for example disk full)
|
* <p> Note that if an Exception is hit (for example disk full)
|
||||||
* then the index will be consistent, but this document
|
* then the index will be consistent, but this document
|
||||||
|
@ -1242,9 +1220,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a document to this index, using the provided analyzer instead of the
|
* Adds a document to this index, using the provided analyzer instead of the
|
||||||
* value of {@link #getAnalyzer()}. If the document contains more than
|
* value of {@link #getAnalyzer()}.
|
||||||
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
|
|
||||||
* discarded.
|
|
||||||
*
|
*
|
||||||
* <p>See {@link #addDocument(Document)} for details on
|
* <p>See {@link #addDocument(Document)} for details on
|
||||||
* index and IndexWriter state after an Exception, and
|
* index and IndexWriter state after an Exception, and
|
||||||
|
|
|
@ -41,8 +41,6 @@ import org.apache.lucene.util.Version;
|
||||||
*/
|
*/
|
||||||
public final class IndexWriterConfig implements Cloneable {
|
public final class IndexWriterConfig implements Cloneable {
|
||||||
|
|
||||||
public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies the open mode for {@link IndexWriter}:
|
* Specifies the open mode for {@link IndexWriter}:
|
||||||
* <ul>
|
* <ul>
|
||||||
|
@ -55,7 +53,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
|
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
|
||||||
|
|
||||||
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
|
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
|
||||||
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
|
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
|
||||||
|
|
||||||
/** Denotes a flush trigger is disabled. */
|
/** Denotes a flush trigger is disabled. */
|
||||||
public final static int DISABLE_AUTO_FLUSH = -1;
|
public final static int DISABLE_AUTO_FLUSH = -1;
|
||||||
|
@ -113,7 +111,6 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
private IndexDeletionPolicy delPolicy;
|
private IndexDeletionPolicy delPolicy;
|
||||||
private IndexCommit commit;
|
private IndexCommit commit;
|
||||||
private OpenMode openMode;
|
private OpenMode openMode;
|
||||||
private int maxFieldLength;
|
|
||||||
private Similarity similarity;
|
private Similarity similarity;
|
||||||
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
||||||
private MergeScheduler mergeScheduler;
|
private MergeScheduler mergeScheduler;
|
||||||
|
@ -145,7 +142,6 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
||||||
commit = null;
|
commit = null;
|
||||||
openMode = OpenMode.CREATE_OR_APPEND;
|
openMode = OpenMode.CREATE_OR_APPEND;
|
||||||
maxFieldLength = UNLIMITED_FIELD_LENGTH;
|
|
||||||
similarity = Similarity.getDefault();
|
similarity = Similarity.getDefault();
|
||||||
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
||||||
mergeScheduler = new ConcurrentMergeScheduler();
|
mergeScheduler = new ConcurrentMergeScheduler();
|
||||||
|
@ -219,37 +215,6 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
return delPolicy;
|
return delPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* The maximum number of terms that will be indexed for a single field in a
|
|
||||||
* document. This limits the amount of memory required for indexing, so that
|
|
||||||
* collections with very large files will not crash the indexing process by
|
|
||||||
* running out of memory. This setting refers to the number of running terms,
|
|
||||||
* not to the number of different terms.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this silently truncates large documents, excluding from the
|
|
||||||
* index all terms that occur further in the document. If you know your source
|
|
||||||
* documents are large, be sure to set this value high enough to accomodate
|
|
||||||
* the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
|
|
||||||
* the only limit is your memory, but you should anticipate an
|
|
||||||
* OutOfMemoryError.
|
|
||||||
* <p>
|
|
||||||
* By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
|
|
||||||
*/
|
|
||||||
public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
|
|
||||||
this.maxFieldLength = maxFieldLength;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the maximum number of terms that will be indexed for a single field
|
|
||||||
* in a document.
|
|
||||||
*
|
|
||||||
* @see #setMaxFieldLength(int)
|
|
||||||
*/
|
|
||||||
public int getMaxFieldLength() {
|
|
||||||
return maxFieldLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: allows to open a certain commit point. The default is null which
|
* Expert: allows to open a certain commit point. The default is null which
|
||||||
* opens the latest commit point.
|
* opens the latest commit point.
|
||||||
|
@ -611,7 +576,6 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
|
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
|
||||||
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
||||||
sb.append("openMode=").append(openMode).append("\n");
|
sb.append("openMode=").append(openMode).append("\n");
|
||||||
sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
|
|
||||||
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
|
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
|
||||||
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
|
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
|
||||||
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
|
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
|
||||||
|
|
|
@ -784,7 +784,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
public void testHighFreqTerm() throws IOException {
|
public void testHighFreqTerm() throws IOException {
|
||||||
MockDirectoryWrapper dir = newDirectory();
|
MockDirectoryWrapper dir = newDirectory();
|
||||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||||
TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000000).setRAMBufferSizeMB(0.01));
|
TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01));
|
||||||
// Massive doc that has 128 K a's
|
// Massive doc that has 128 K a's
|
||||||
StringBuilder b = new StringBuilder(1024*1024);
|
StringBuilder b = new StringBuilder(1024*1024);
|
||||||
for(int i=0;i<4096;i++) {
|
for(int i=0;i<4096;i++) {
|
||||||
|
@ -1236,30 +1236,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-1084: test user-specified field length
|
|
||||||
public void testUserSpecifiedMaxFieldLength() throws IOException {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
|
|
||||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
|
||||||
TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000));
|
|
||||||
|
|
||||||
Document doc = new Document();
|
|
||||||
StringBuilder b = new StringBuilder();
|
|
||||||
for(int i=0;i<10000;i++)
|
|
||||||
b.append(" a");
|
|
||||||
b.append(" x");
|
|
||||||
doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
IndexReader reader = IndexReader.open(dir, true);
|
|
||||||
Term t = new Term("field", "x");
|
|
||||||
assertEquals(1, reader.docFreq(t));
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// LUCENE-325: test expungeDeletes, when 2 singular merges
|
// LUCENE-325: test expungeDeletes, when 2 singular merges
|
||||||
// are required
|
// are required
|
||||||
public void testExpungeDeletes() throws IOException {
|
public void testExpungeDeletes() throws IOException {
|
||||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
|
@ -26,7 +25,6 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
|
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
|
||||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.search.DefaultSimilarity;
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.Similarity;
|
import org.apache.lucene.search.Similarity;
|
||||||
|
@ -49,22 +47,12 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class MyWarmer extends IndexReaderWarmer {
|
|
||||||
// Does not implement anything - used only for type checking on IndexWriterConfig.
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void warm(IndexReader reader) throws IOException {
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDefaults() throws Exception {
|
public void testDefaults() throws Exception {
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
|
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
|
||||||
assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass());
|
assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass());
|
||||||
assertNull(conf.getIndexCommit());
|
assertNull(conf.getIndexCommit());
|
||||||
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
|
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
|
||||||
assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength());
|
|
||||||
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
|
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
|
||||||
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
|
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
|
||||||
assertTrue(Similarity.getDefault() == conf.getSimilarity());
|
assertTrue(Similarity.getDefault() == conf.getSimilarity());
|
||||||
|
@ -129,7 +117,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
// Tests that the values of the constants does not change
|
// Tests that the values of the constants does not change
|
||||||
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
|
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
|
||||||
assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
|
assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
|
||||||
assertEquals(Integer.MAX_VALUE, IndexWriterConfig.UNLIMITED_FIELD_LENGTH);
|
|
||||||
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||||
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
|
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
|
||||||
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
|
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
|
||||||
|
|
|
@ -22,8 +22,16 @@ import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
@ -39,4 +47,26 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLimitTokenCountIndexWriter() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||||
|
TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000)));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
for(int i=0;i<10000;i++)
|
||||||
|
b.append(" a");
|
||||||
|
b.append(" x");
|
||||||
|
doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(dir, true);
|
||||||
|
Term t = new Term("field", "x");
|
||||||
|
assertEquals(1, reader.docFreq(t));
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -46,8 +46,7 @@ import java.io.PrintStream;
|
||||||
* Create an index. <br>
|
* Create an index. <br>
|
||||||
* Other side effects: index writer object in perfRunData is set. <br>
|
* Other side effects: index writer object in perfRunData is set. <br>
|
||||||
* Relevant properties: <code>merge.factor (default 10),
|
* Relevant properties: <code>merge.factor (default 10),
|
||||||
* max.buffered (default no flush), max.field.length (default
|
* max.buffered (default no flush), compound (default true), ram.flush.mb [default 0],
|
||||||
* 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0],
|
|
||||||
* merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
|
* merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
|
||||||
* merge.scheduler (default
|
* merge.scheduler (default
|
||||||
* org.apache.lucene.index.ConcurrentMergeScheduler),
|
* org.apache.lucene.index.ConcurrentMergeScheduler),
|
||||||
|
@ -153,7 +152,6 @@ public class CreateIndexTask extends PerfTask {
|
||||||
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
|
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
iwConf.setMaxFieldLength(config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH));
|
|
||||||
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
|
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
|
||||||
final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
|
final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
|
||||||
if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
||||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.index.LogMergePolicy;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open an index writer.
|
* Open an index writer.
|
||||||
* <br>Other side effects: index writer object in perfRunData is set.
|
* <br>Other side effects: index writer object in perfRunData is set.
|
||||||
|
@ -41,7 +40,6 @@ import java.io.IOException;
|
||||||
public class OpenIndexTask extends PerfTask {
|
public class OpenIndexTask extends PerfTask {
|
||||||
|
|
||||||
public static final int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
|
public static final int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
|
||||||
public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
|
|
||||||
public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
|
public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
|
||||||
public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
|
public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
|
||||||
private String commitUserData;
|
private String commitUserData;
|
||||||
|
|
|
@ -74,7 +74,6 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
private void loadExternalFileDictionary(SolrCore core) {
|
private void loadExternalFileDictionary(SolrCore core) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -92,7 +91,6 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker {
|
||||||
new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()).
|
new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()).
|
||||||
setMaxBufferedDocs(150).
|
setMaxBufferedDocs(150).
|
||||||
setMergePolicy(mp).
|
setMergePolicy(mp).
|
||||||
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH).
|
|
||||||
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,6 @@ public class SolrIndexConfig {
|
||||||
maxMergeDocs = -1;
|
maxMergeDocs = -1;
|
||||||
mergeFactor = -1;
|
mergeFactor = -1;
|
||||||
ramBufferSizeMB = 16;
|
ramBufferSizeMB = 16;
|
||||||
maxFieldLength = -1;
|
|
||||||
writeLockTimeout = -1;
|
writeLockTimeout = -1;
|
||||||
commitLockTimeout = -1;
|
commitLockTimeout = -1;
|
||||||
lockType = null;
|
lockType = null;
|
||||||
|
@ -71,7 +70,6 @@ public class SolrIndexConfig {
|
||||||
|
|
||||||
public final double ramBufferSizeMB;
|
public final double ramBufferSizeMB;
|
||||||
|
|
||||||
public final int maxFieldLength;
|
|
||||||
public final int writeLockTimeout;
|
public final int writeLockTimeout;
|
||||||
public final int commitLockTimeout;
|
public final int commitLockTimeout;
|
||||||
public final String lockType;
|
public final String lockType;
|
||||||
|
@ -95,7 +93,6 @@ public class SolrIndexConfig {
|
||||||
mergeFactor=solrConfig.getInt(prefix+"/mergeFactor",def.mergeFactor);
|
mergeFactor=solrConfig.getInt(prefix+"/mergeFactor",def.mergeFactor);
|
||||||
ramBufferSizeMB = solrConfig.getDouble(prefix+"/ramBufferSizeMB", def.ramBufferSizeMB);
|
ramBufferSizeMB = solrConfig.getDouble(prefix+"/ramBufferSizeMB", def.ramBufferSizeMB);
|
||||||
|
|
||||||
maxFieldLength=solrConfig.getInt(prefix+"/maxFieldLength",def.maxFieldLength);
|
|
||||||
writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout);
|
writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout);
|
||||||
commitLockTimeout=solrConfig.getInt(prefix+"/commitLockTimeout", def.commitLockTimeout);
|
commitLockTimeout=solrConfig.getInt(prefix+"/commitLockTimeout", def.commitLockTimeout);
|
||||||
lockType=solrConfig.get(prefix+"/lockType", def.lockType);
|
lockType=solrConfig.get(prefix+"/lockType", def.lockType);
|
||||||
|
@ -153,9 +150,6 @@ public class SolrIndexConfig {
|
||||||
if (termIndexInterval != -1)
|
if (termIndexInterval != -1)
|
||||||
iwc.setTermIndexInterval(termIndexInterval);
|
iwc.setTermIndexInterval(termIndexInterval);
|
||||||
|
|
||||||
if (maxFieldLength != -1)
|
|
||||||
iwc.setMaxFieldLength(maxFieldLength);
|
|
||||||
|
|
||||||
if (writeLockTimeout != -1)
|
if (writeLockTimeout != -1)
|
||||||
iwc.setWriteLockTimeout(writeLockTimeout);
|
iwc.setWriteLockTimeout(writeLockTimeout);
|
||||||
|
|
||||||
|
|
|
@ -99,8 +99,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{
|
||||||
Directory dir = newFSDirectory(newDir);
|
Directory dir = newFSDirectory(newDir);
|
||||||
IndexWriter iw = new IndexWriter(
|
IndexWriter iw = new IndexWriter(
|
||||||
dir,
|
dir,
|
||||||
new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)).
|
new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40))
|
||||||
setMaxFieldLength(1000)
|
|
||||||
);
|
);
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED));
|
doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED));
|
||||||
|
|
|
@ -63,8 +63,7 @@ public class TestSort extends AbstractSolrTestCase {
|
||||||
IndexWriter iw = new IndexWriter(
|
IndexWriter iw = new IndexWriter(
|
||||||
dir,
|
dir,
|
||||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)).
|
new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)).
|
||||||
setOpenMode(IndexWriterConfig.OpenMode.CREATE).
|
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||||
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
|
|
||||||
);
|
);
|
||||||
final MyDoc[] mydocs = new MyDoc[ndocs];
|
final MyDoc[] mydocs = new MyDoc[ndocs];
|
||||||
|
|
||||||
|
|
|
@ -284,8 +284,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
Directory dir = newFSDirectory(altIndexDir);
|
Directory dir = newFSDirectory(altIndexDir);
|
||||||
IndexWriter iw = new IndexWriter(
|
IndexWriter iw = new IndexWriter(
|
||||||
dir,
|
dir,
|
||||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).
|
new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
|
||||||
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
|
|
||||||
);
|
);
|
||||||
for (int i = 0; i < ALT_DOCS.length; i++) {
|
for (int i = 0; i < ALT_DOCS.length; i++) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
|
|
Loading…
Reference in New Issue