mirror of https://github.com/apache/lucene.git
LUCENE-5611: indexing optimizations, dont compute CRC for internal-use of RAMOutputStream, dont do heavy per-term stuff in skipper until we actually must buffer skipdata
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5611@1590858 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48e9fa5e60
commit
c4b632ef7f
|
@ -88,27 +88,51 @@ final class Lucene41SkipWriter extends MultiLevelSkipListWriter {
|
|||
this.fieldHasOffsets = fieldHasOffsets;
|
||||
this.fieldHasPayloads = fieldHasPayloads;
|
||||
}
|
||||
|
||||
// tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the skipper
|
||||
// is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of junk.
|
||||
// this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we save
|
||||
// away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
|
||||
private boolean initialized;
|
||||
long lastDocFP;
|
||||
long lastPosFP;
|
||||
long lastPayFP;
|
||||
|
||||
@Override
|
||||
public void resetSkip() {
|
||||
super.resetSkip();
|
||||
Arrays.fill(lastSkipDoc, 0);
|
||||
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
|
||||
lastDocFP = docOut.getFilePointer();
|
||||
if (fieldHasPositions) {
|
||||
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
|
||||
if (fieldHasPayloads) {
|
||||
Arrays.fill(lastPayloadByteUpto, 0);
|
||||
}
|
||||
lastPosFP = posOut.getFilePointer();
|
||||
if (fieldHasOffsets || fieldHasPayloads) {
|
||||
Arrays.fill(lastSkipPayPointer, payOut.getFilePointer());
|
||||
lastPayFP = payOut.getFilePointer();
|
||||
}
|
||||
}
|
||||
initialized = false;
|
||||
}
|
||||
|
||||
public void initSkip() {
|
||||
if (!initialized) {
|
||||
super.resetSkip();
|
||||
Arrays.fill(lastSkipDoc, 0);
|
||||
Arrays.fill(lastSkipDocPointer, lastDocFP);
|
||||
if (fieldHasPositions) {
|
||||
Arrays.fill(lastSkipPosPointer, lastPosFP);
|
||||
if (fieldHasPayloads) {
|
||||
Arrays.fill(lastPayloadByteUpto, 0);
|
||||
}
|
||||
if (fieldHasOffsets || fieldHasPayloads) {
|
||||
Arrays.fill(lastSkipPayPointer, lastPayFP);
|
||||
}
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the values for the current skip data.
|
||||
*/
|
||||
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
||||
initSkip();
|
||||
this.curDoc = doc;
|
||||
this.curDocPointer = docOut.getFilePointer();
|
||||
this.curPosPointer = posFP;
|
||||
|
|
|
@ -97,7 +97,7 @@ class PrefixCodedTerms implements Iterable<Term> {
|
|||
/** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
|
||||
public static class Builder {
|
||||
private RAMFile buffer = new RAMFile();
|
||||
private RAMOutputStream output = new RAMOutputStream(buffer);
|
||||
private RAMOutputStream output = new RAMOutputStream(buffer, false);
|
||||
private Term lastTerm = new Term("");
|
||||
|
||||
/** add a term */
|
||||
|
|
|
@ -171,7 +171,7 @@ public class RAMDirectory extends BaseDirectory {
|
|||
existing.directory = null;
|
||||
}
|
||||
fileMap.put(name, file);
|
||||
return new RAMOutputStream(file);
|
||||
return new RAMOutputStream(file, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,20 +38,25 @@ public class RAMOutputStream extends IndexOutput {
|
|||
private long bufferStart;
|
||||
private int bufferLength;
|
||||
|
||||
private Checksum crc = new BufferedChecksum(new CRC32());
|
||||
private final Checksum crc;
|
||||
|
||||
/** Construct an empty output buffer. */
|
||||
public RAMOutputStream() {
|
||||
this(new RAMFile());
|
||||
this(new RAMFile(), false);
|
||||
}
|
||||
|
||||
public RAMOutputStream(RAMFile f) {
|
||||
public RAMOutputStream(RAMFile f, boolean checksum) {
|
||||
file = f;
|
||||
|
||||
// make sure that we switch to the
|
||||
// first needed buffer lazily
|
||||
currentBufferIndex = -1;
|
||||
currentBuffer = null;
|
||||
if (checksum) {
|
||||
crc = new BufferedChecksum(new CRC32());
|
||||
} else {
|
||||
crc = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy the current contents of this buffer to the named output. */
|
||||
|
@ -99,7 +104,9 @@ public class RAMOutputStream extends IndexOutput {
|
|||
bufferStart = 0;
|
||||
bufferLength = 0;
|
||||
file.setLength(0);
|
||||
crc.reset();
|
||||
if (crc != null) {
|
||||
crc.reset();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -113,14 +120,18 @@ public class RAMOutputStream extends IndexOutput {
|
|||
currentBufferIndex++;
|
||||
switchCurrentBuffer();
|
||||
}
|
||||
crc.update(b);
|
||||
if (crc != null) {
|
||||
crc.update(b);
|
||||
}
|
||||
currentBuffer[bufferPosition++] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int len) throws IOException {
|
||||
assert b != null;
|
||||
crc.update(b, offset, len);
|
||||
if (crc != null) {
|
||||
crc.update(b, offset, len);
|
||||
}
|
||||
while (len > 0) {
|
||||
if (bufferPosition == bufferLength) {
|
||||
currentBufferIndex++;
|
||||
|
@ -171,6 +182,10 @@ public class RAMOutputStream extends IndexOutput {
|
|||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
return crc.getValue();
|
||||
if (crc == null) {
|
||||
throw new IllegalStateException("internal RAMOutputStream created with checksum disabled");
|
||||
} else {
|
||||
return crc.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ public class TestHugeRamFile extends LuceneTestCase {
|
|||
public void testHugeFile() throws IOException {
|
||||
DenseRAMFile f = new DenseRAMFile();
|
||||
// output part
|
||||
RAMOutputStream out = new RAMOutputStream(f);
|
||||
RAMOutputStream out = new RAMOutputStream(f, true);
|
||||
byte b1[] = new byte[RAMOutputStream.BUFFER_SIZE];
|
||||
byte b2[] = new byte[RAMOutputStream.BUFFER_SIZE / 3];
|
||||
for (int i = 0; i < b1.length; i++) {
|
||||
|
|
|
@ -582,7 +582,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
file = new RAMFile();
|
||||
sorter = new DocOffsetSorter(maxDoc);
|
||||
}
|
||||
final IndexOutput out = new RAMOutputStream(file);
|
||||
final IndexOutput out = new RAMOutputStream(file, false);
|
||||
int doc;
|
||||
int i = 0;
|
||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
|
Loading…
Reference in New Issue