LUCENE-3932: speed up Lucene3X's loading of the terms index by pre-sizing the in-memory PackedInts based on size of the .tii file

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1309866 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-04-05 14:08:30 +00:00
parent cbddf0809e
commit ea901cb0f8
5 changed files with 47 additions and 15 deletions

View File

@ -690,6 +690,9 @@ Optimizations
* LUCENE-3795: Replace contrib/spatial with modules/spatial. This includes
a basic spatial strategy interface. (David Smiley, Chris Male, ryan)
* LUCENE-3932: Lucene3x codec loads terms index faster, by
pre-allocating the packed ints array based on the .tii file size
(Sean Bridges via Mike McCandless)
Bug fixes

View File

@ -22,6 +22,7 @@ import java.util.Arrays;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.MathUtil;
/**
* This abstract class reads skip lists with multiple levels.
@ -184,21 +185,9 @@ public abstract class MultiLevelSkipListReader {
}
}
/** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */
static int log(int x, int base) {
assert base >= 2;
int ret = 0;
long n = base; // needs to be a long to avoid overflow
while (x >= n) {
n *= base;
ret++;
}
return ret;
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
numberOfSkipLevels = log(docCount, skipInterval[0]);
numberOfSkipLevels = MathUtil.log(docCount, skipInterval[0]);
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.MathUtil;
/**
* This abstract class writes skip lists with multiple levels.
@ -61,7 +62,7 @@ public abstract class MultiLevelSkipListWriter {
this.skipInterval = skipInterval;
// calculate the maximum number of skip levels for this document frequency
numberOfSkipLevels = MultiLevelSkipListReader.log(df, skipInterval);
numberOfSkipLevels = MathUtil.log(df, skipInterval);
// make sure it does not exceed maxSkipLevels
if (numberOfSkipLevels > maxSkipLevels) {

View File

@ -25,6 +25,7 @@ import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
import org.apache.lucene.util.PagedBytes;
@ -72,7 +73,9 @@ class TermInfosReaderIndex {
PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, false);
String currentField = null;
List<String> fieldStrs = new ArrayList<String>();
int fieldCounter = -1;

View File

@ -0,0 +1,36 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public final class MathUtil {
// No instance:
private MathUtil() {
}
/** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */
public static int log(long x, int base) {
assert base > 1;
int ret = 0;
while (x >= base) {
x /= base;
ret++;
}
return ret;
}
}