LUCENE-2295: remove maxFieldLength (trunk)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1060340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2011-01-18 12:01:40 +00:00
parent e43fdc9654
commit 2a0484bd40
16 changed files with 43 additions and 140 deletions

View File

@ -84,8 +84,7 @@ public class IndexHTML {
}
writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig(
Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT))
.setMaxFieldLength(1000000).setOpenMode(
create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
indexDocs(root, index, create); // add new docs
System.out.println("Optimizing index...");

View File

@ -63,8 +63,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
fieldState.reset(docState.doc.getBoost());
final int maxFieldLength = docState.maxFieldLength;
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i<count;i++) {
@ -171,12 +169,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
if (!success)
docState.docWriter.setAborting();
}
fieldState.length++;
fieldState.position++;
if (++fieldState.length >= maxFieldLength) {
if (docState.infoStream != null)
docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
break;
}
hasMoreTokens = stream.incrementToken();
}

View File

@ -127,7 +127,6 @@ final class DocumentsWriter {
private boolean aborting; // True if an abort is pending
PrintStream infoStream;
int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
Similarity similarity;
// max # simultaneous threads; if there are more than
@ -140,7 +139,6 @@ final class DocumentsWriter {
static class DocState {
DocumentsWriter docWriter;
Analyzer analyzer;
int maxFieldLength;
PrintStream infoStream;
Similarity similarity;
int docID;
@ -191,6 +189,7 @@ final class DocumentsWriter {
/**
* Allocate bytes used from shared pool.
*/
@Override
protected byte[] newBuffer(int size) {
assert size == PER_DOC_BLOCK_SIZE;
return perDocAllocator.getByteBlock();
@ -358,13 +357,6 @@ final class DocumentsWriter {
}
}
synchronized void setMaxFieldLength(int maxFieldLength) {
this.maxFieldLength = maxFieldLength;
for(int i=0;i<threadStates.length;i++) {
threadStates[i].docState.maxFieldLength = maxFieldLength;
}
}
synchronized void setSimilarity(Similarity similarity) {
this.similarity = similarity;
for(int i=0;i<threadStates.length;i++) {

View File

@ -35,7 +35,6 @@ final class DocumentsWriterThreadState {
public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
this.docWriter = docWriter;
docState = new DocumentsWriter.DocState();
docState.maxFieldLength = docWriter.maxFieldLength;
docState.infoStream = docWriter.infoStream;
docState.similarity = docWriter.similarity;
docState.docWriter = docWriter;

View File

@ -662,9 +662,6 @@ public class IndexWriter implements Closeable {
* IndexWriter. Additionally, calling {@link #getConfig()} and changing the
* parameters does not affect that IndexWriter instance.
* <p>
* <b>NOTE:</b> by default, {@link IndexWriterConfig#getMaxFieldLength()}
* returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to
* whether this setting fits your application.
*
* @param d
* the index directory. The index is either created or appended
@ -689,7 +686,6 @@ public class IndexWriter implements Closeable {
directory = d;
analyzer = conf.getAnalyzer();
infoStream = defaultInfoStream;
maxFieldLength = conf.getMaxFieldLength();
termIndexInterval = conf.getTermIndexInterval();
mergePolicy = conf.getMergePolicy();
mergePolicy.setIndexWriter(this);
@ -768,7 +764,6 @@ public class IndexWriter implements Closeable {
docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
docWriter.setInfoStream(infoStream);
docWriter.setMaxFieldLength(maxFieldLength);
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
@ -987,6 +982,7 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@Override
public void close() throws CorruptIndexException, IOException {
close(true);
}
@ -1177,25 +1173,7 @@ public class IndexWriter implements Closeable {
}
/**
* The maximum number of terms that will be indexed for a single field in a
* document. This limits the amount of memory required for indexing, so that
* collections with very large files will not crash the indexing process by
* running out of memory.<p/>
* Note that this effectively truncates large documents, excluding from the
* index terms that occur further in the document. If you know your source
* documents are large, be sure to set this value high enough to accommodate
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
* By default, no more than 10,000 terms will be indexed for a field.
*
* @see MaxFieldLength
*/
private int maxFieldLength;
/**
* Adds a document to this index. If the document contains more than
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field,
* the remainder are discarded.
* Adds a document to this index.
*
* <p> Note that if an Exception is hit (for example disk full)
* then the index will be consistent, but this document
@ -1242,9 +1220,7 @@ public class IndexWriter implements Closeable {
/**
* Adds a document to this index, using the provided analyzer instead of the
* value of {@link #getAnalyzer()}. If the document contains more than
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded.
* value of {@link #getAnalyzer()}.
*
* <p>See {@link #addDocument(Document)} for details on
* index and IndexWriter state after an Exception, and

View File

@ -41,8 +41,6 @@ import org.apache.lucene.util.Version;
*/
public final class IndexWriterConfig implements Cloneable {
public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
/**
* Specifies the open mode for {@link IndexWriter}:
* <ul>
@ -55,7 +53,7 @@ public final class IndexWriterConfig implements Cloneable {
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
/** Denotes a flush trigger is disabled. */
public final static int DISABLE_AUTO_FLUSH = -1;
@ -113,7 +111,6 @@ public final class IndexWriterConfig implements Cloneable {
private IndexDeletionPolicy delPolicy;
private IndexCommit commit;
private OpenMode openMode;
private int maxFieldLength;
private Similarity similarity;
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
@ -145,7 +142,6 @@ public final class IndexWriterConfig implements Cloneable {
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
maxFieldLength = UNLIMITED_FIELD_LENGTH;
similarity = Similarity.getDefault();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
@ -219,37 +215,6 @@ public final class IndexWriterConfig implements Cloneable {
return delPolicy;
}
/**
* The maximum number of terms that will be indexed for a single field in a
* document. This limits the amount of memory required for indexing, so that
* collections with very large files will not crash the indexing process by
* running out of memory. This setting refers to the number of running terms,
* not to the number of different terms.
* <p>
* <b>NOTE:</b> this silently truncates large documents, excluding from the
* index all terms that occur further in the document. If you know your source
* documents are large, be sure to set this value high enough to accomodate
* the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
* the only limit is your memory, but you should anticipate an
* OutOfMemoryError.
* <p>
* By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
*/
public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
this.maxFieldLength = maxFieldLength;
return this;
}
/**
* Returns the maximum number of terms that will be indexed for a single field
* in a document.
*
* @see #setMaxFieldLength(int)
*/
public int getMaxFieldLength() {
return maxFieldLength;
}
/**
* Expert: allows to open a certain commit point. The default is null which
* opens the latest commit point.
@ -611,7 +576,6 @@ public final class IndexWriterConfig implements Cloneable {
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(openMode).append("\n");
sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");

View File

@ -784,7 +784,7 @@ public class TestIndexWriter extends LuceneTestCase {
public void testHighFreqTerm() throws IOException {
MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000000).setRAMBufferSizeMB(0.01));
TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01));
// Massive doc that has 128 K a's
StringBuilder b = new StringBuilder(1024*1024);
for(int i=0;i<4096;i++) {
@ -1236,30 +1236,7 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
dir.close();
}
// LUCENE-1084: test user-specified field length
public void testUserSpecifiedMaxFieldLength() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000));
Document doc = new Document();
StringBuilder b = new StringBuilder();
for(int i=0;i<10000;i++)
b.append(" a");
b.append(" x");
doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(dir, true);
Term t = new Term("field", "x");
assertEquals(1, reader.docFreq(t));
reader.close();
dir.close();
}
// LUCENE-325: test expungeDeletes, when 2 singular merges
// are required
public void testExpungeDeletes() throws IOException {

View File

@ -17,7 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
@ -26,7 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
@ -49,22 +47,12 @@ public class TestIndexWriterConfig extends LuceneTestCase {
}
private static final class MyWarmer extends IndexReaderWarmer {
// Does not implement anything - used only for type checking on IndexWriterConfig.
@Override
public void warm(IndexReader reader) throws IOException {
}
}
@Test
public void testDefaults() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass());
assertNull(conf.getIndexCommit());
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength());
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
assertTrue(Similarity.getDefault() == conf.getSimilarity());
@ -129,7 +117,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
// Tests that the values of the constants does not change
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
assertEquals(Integer.MAX_VALUE, IndexWriterConfig.UNLIMITED_FIELD_LENGTH);
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);

View File

@ -22,8 +22,16 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
@ -39,4 +47,26 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
}
public void testLimitTokenCountIndexWriter() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000)));
Document doc = new Document();
StringBuilder b = new StringBuilder();
for(int i=0;i<10000;i++)
b.append(" a");
b.append(" x");
doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(dir, true);
Term t = new Term("field", "x");
assertEquals(1, reader.docFreq(t));
reader.close();
dir.close();
}
}

View File

@ -46,8 +46,7 @@ import java.io.PrintStream;
* Create an index. <br>
* Other side effects: index writer object in perfRunData is set. <br>
* Relevant properties: <code>merge.factor (default 10),
* max.buffered (default no flush), max.field.length (default
* 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0],
* max.buffered (default no flush), compound (default true), ram.flush.mb [default 0],
* merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
* merge.scheduler (default
* org.apache.lucene.index.ConcurrentMergeScheduler),
@ -153,7 +152,6 @@ public class CreateIndexTask extends PerfTask {
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
}
}
iwConf.setMaxFieldLength(config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH));
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) {

View File

@ -26,7 +26,6 @@ import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import java.io.IOException;
/**
* Open an index writer.
* <br>Other side effects: index writer object in perfRunData is set.
@ -41,7 +40,6 @@ import java.io.IOException;
public class OpenIndexTask extends PerfTask {
public static final int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
private String commitUserData;

View File

@ -74,7 +74,6 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker {
return null;
}
@SuppressWarnings("unchecked")
private void loadExternalFileDictionary(SolrCore core) {
try {
@ -92,7 +91,6 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker {
new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()).
setMaxBufferedDocs(150).
setMergePolicy(mp).
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH).
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
);

View File

@ -53,7 +53,6 @@ public class SolrIndexConfig {
maxMergeDocs = -1;
mergeFactor = -1;
ramBufferSizeMB = 16;
maxFieldLength = -1;
writeLockTimeout = -1;
commitLockTimeout = -1;
lockType = null;
@ -71,7 +70,6 @@ public class SolrIndexConfig {
public final double ramBufferSizeMB;
public final int maxFieldLength;
public final int writeLockTimeout;
public final int commitLockTimeout;
public final String lockType;
@ -95,7 +93,6 @@ public class SolrIndexConfig {
mergeFactor=solrConfig.getInt(prefix+"/mergeFactor",def.mergeFactor);
ramBufferSizeMB = solrConfig.getDouble(prefix+"/ramBufferSizeMB", def.ramBufferSizeMB);
maxFieldLength=solrConfig.getInt(prefix+"/maxFieldLength",def.maxFieldLength);
writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout);
commitLockTimeout=solrConfig.getInt(prefix+"/commitLockTimeout", def.commitLockTimeout);
lockType=solrConfig.get(prefix+"/lockType", def.lockType);
@ -153,9 +150,6 @@ public class SolrIndexConfig {
if (termIndexInterval != -1)
iwc.setTermIndexInterval(termIndexInterval);
if (maxFieldLength != -1)
iwc.setMaxFieldLength(maxFieldLength);
if (writeLockTimeout != -1)
iwc.setWriteLockTimeout(writeLockTimeout);

View File

@ -99,8 +99,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{
Directory dir = newFSDirectory(newDir);
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)).
setMaxFieldLength(1000)
new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40))
);
Document doc = new Document();
doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED));

View File

@ -63,8 +63,7 @@ public class TestSort extends AbstractSolrTestCase {
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)).
setOpenMode(IndexWriterConfig.OpenMode.CREATE).
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
);
final MyDoc[] mydocs = new MyDoc[ndocs];

View File

@ -284,8 +284,7 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
Directory dir = newFSDirectory(altIndexDir);
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).
setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
);
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();