LUCENE-5611: indexing optimizations, dont compute CRC for internal-use of RAMOutputStream, dont do heavy per-term stuff in skipper until we actually must buffer skipdata

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5611@1590858 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-04-29 01:49:24 +00:00
parent 48e9fa5e60
commit c4b632ef7f
6 changed files with 58 additions and 19 deletions

View File

@ -88,27 +88,51 @@ final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
this.fieldHasOffsets = fieldHasOffsets;
this.fieldHasPayloads = fieldHasPayloads;
}
// tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the skipper
// is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of junk.
// this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we save
// away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
private boolean initialized;
long lastDocFP;
long lastPosFP;
long lastPayFP;
@Override
public void resetSkip() {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
lastDocFP = docOut.getFilePointer();
if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
if (fieldHasPayloads) {
Arrays.fill(lastPayloadByteUpto, 0);
}
lastPosFP = posOut.getFilePointer();
if (fieldHasOffsets || fieldHasPayloads) {
Arrays.fill(lastSkipPayPointer, payOut.getFilePointer());
lastPayFP = payOut.getFilePointer();
}
}
initialized = false;
}
public void initSkip() {
if (!initialized) {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
Arrays.fill(lastSkipDocPointer, lastDocFP);
if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, lastPosFP);
if (fieldHasPayloads) {
Arrays.fill(lastPayloadByteUpto, 0);
}
if (fieldHasOffsets || fieldHasPayloads) {
Arrays.fill(lastSkipPayPointer, lastPayFP);
}
}
initialized = true;
}
}
/**
* Sets the values for the current skip data.
*/
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
initSkip();
this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer();
this.curPosPointer = posFP;

View File

@ -97,7 +97,7 @@ class PrefixCodedTerms implements Iterable<Term> {
/** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
public static class Builder {
private RAMFile buffer = new RAMFile();
private RAMOutputStream output = new RAMOutputStream(buffer);
private RAMOutputStream output = new RAMOutputStream(buffer, false);
private Term lastTerm = new Term("");
/** add a term */

View File

@ -171,7 +171,7 @@ public class RAMDirectory extends BaseDirectory {
existing.directory = null;
}
fileMap.put(name, file);
return new RAMOutputStream(file);
return new RAMOutputStream(file, true);
}
/**

View File

@ -38,20 +38,25 @@ public class RAMOutputStream extends IndexOutput {
private long bufferStart;
private int bufferLength;
private Checksum crc = new BufferedChecksum(new CRC32());
private final Checksum crc;
/** Construct an empty output buffer. */
public RAMOutputStream() {
this(new RAMFile());
this(new RAMFile(), false);
}
public RAMOutputStream(RAMFile f) {
public RAMOutputStream(RAMFile f, boolean checksum) {
file = f;
// make sure that we switch to the
// first needed buffer lazily
currentBufferIndex = -1;
currentBuffer = null;
if (checksum) {
crc = new BufferedChecksum(new CRC32());
} else {
crc = null;
}
}
/** Copy the current contents of this buffer to the named output. */
@ -99,7 +104,9 @@ public class RAMOutputStream extends IndexOutput {
bufferStart = 0;
bufferLength = 0;
file.setLength(0);
crc.reset();
if (crc != null) {
crc.reset();
}
}
@Override
@ -113,14 +120,18 @@ public class RAMOutputStream extends IndexOutput {
currentBufferIndex++;
switchCurrentBuffer();
}
crc.update(b);
if (crc != null) {
crc.update(b);
}
currentBuffer[bufferPosition++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int len) throws IOException {
assert b != null;
crc.update(b, offset, len);
if (crc != null) {
crc.update(b, offset, len);
}
while (len > 0) {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
@ -171,6 +182,10 @@ public class RAMOutputStream extends IndexOutput {
@Override
public long getChecksum() throws IOException {
return crc.getValue();
if (crc == null) {
throw new IllegalStateException("internal RAMOutputStream created with checksum disabled");
} else {
return crc.getValue();
}
}
}

View File

@ -54,7 +54,7 @@ public class TestHugeRamFile extends LuceneTestCase {
public void testHugeFile() throws IOException {
DenseRAMFile f = new DenseRAMFile();
// output part
RAMOutputStream out = new RAMOutputStream(f);
RAMOutputStream out = new RAMOutputStream(f, true);
byte b1[] = new byte[RAMOutputStream.BUFFER_SIZE];
byte b2[] = new byte[RAMOutputStream.BUFFER_SIZE / 3];
for (int i = 0; i < b1.length; i++) {

View File

@ -582,7 +582,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
file = new RAMFile();
sorter = new DocOffsetSorter(maxDoc);
}
final IndexOutput out = new RAMOutputStream(file);
final IndexOutput out = new RAMOutputStream(file, false);
int doc;
int i = 0;
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {