From c7f5ce8c6849a3921031540f2b7e4ebdbe5d7f19 Mon Sep 17 00:00:00 2001 From: Han Jiang Date: Sun, 16 Jun 2013 13:06:29 +0000 Subject: [PATCH] LUCENE-5029: merge PendingMetaData into PendingTerm git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1493508 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/temp/TempBlockTermsWriter.java | 134 ++++++++---------- 1 file changed, 60 insertions(+), 74 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java index 88d7ab6ba4f..1cfad3e67a5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsWriter.java @@ -363,12 +363,18 @@ public class TempBlockTermsWriter extends FieldsConsumer { private static final class PendingTerm extends PendingEntry { public final BytesRef term; + // stats public final TermStats stats; + // metadata + public long[] longs; + public byte[] bytes; - public PendingTerm(BytesRef term, TermStats stats) { + public PendingTerm(BytesRef term, TermStats stats, long[] longs, byte[] bytes) { super(true); this.term = term; this.stats = stats; + this.longs = longs; + this.bytes = bytes; } @Override @@ -485,15 +491,6 @@ public class TempBlockTermsWriter extends FieldsConsumer { } } - private static final class PendingMetaData { - public long[] longs; - public RAMOutputStream bytesWriter; - public PendingMetaData(int length) { - longs = new long[length]; - bytesWriter = new RAMOutputStream(); - } - } - final RAMOutputStream scratchBytes = new RAMOutputStream(); class TermsWriter extends TermsConsumer { @@ -857,6 +854,11 @@ public class TempBlockTermsWriter extends FieldsConsumer { final List> subIndices; int termCount; + + final int size = postingsWriter.longsSize(); + long[] lastLongs = new long[size]; + Arrays.fill(lastLongs, 0); + if (isLeafBlock) { subIndices = null; for (PendingEntry ent : slice) { @@ -870,15 +872,23 @@ public class TempBlockTermsWriter extends FieldsConsumer { // System.out.println(" write term suffix=" + suffixBytes); // } // For leaf block we write suffix straight - bytesWriter.writeVInt(suffix); - bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix); + suffixWriter.writeVInt(suffix); + suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix); // Write term stats, to separate byte[] blob: - bytesWriter2.writeVInt(term.stats.docFreq); + statsWriter.writeVInt(term.stats.docFreq); if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { assert term.stats.totalTermFreq >= term.stats.docFreq: term.stats.totalTermFreq + " vs " + term.stats.docFreq; - bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq); + statsWriter.writeVLong(term.stats.totalTermFreq - term.stats.docFreq); } + + // Write term meta data + for (int pos = 0; pos < size; pos++) { + assert term.longs[pos] >= 0; + metaWriter.writeVLong(term.longs[pos] - lastLongs[pos]); + } + lastLongs = term.longs; + metaWriter.writeBytes(term.bytes, 0, term.bytes.length); } termCount = length; } else { @@ -896,16 +906,24 @@ public class TempBlockTermsWriter extends FieldsConsumer { // } // For non-leaf block we borrow 1 bit to record // if entry is term or sub-block - bytesWriter.writeVInt(suffix<<1); - bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix); + suffixWriter.writeVInt(suffix<<1); + suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix); // Write term stats, to separate byte[] blob: - bytesWriter2.writeVInt(term.stats.docFreq); + statsWriter.writeVInt(term.stats.docFreq); if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { assert term.stats.totalTermFreq >= term.stats.docFreq; - bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq); + statsWriter.writeVLong(term.stats.totalTermFreq - term.stats.docFreq); } + // Write term meta data + for (int pos = 0; pos < size; pos++) { + assert term.longs[pos] >= 0; + metaWriter.writeVLong(term.longs[pos] - lastLongs[pos]); + } + lastLongs = term.longs; + metaWriter.writeBytes(term.bytes, 0, term.bytes.length); + termCount++; } else { PendingBlock block = (PendingBlock) ent; @@ -915,8 +933,8 @@ public class TempBlockTermsWriter extends FieldsConsumer { // For non-leaf block we borrow 1 bit to record // if entry is term or sub-block - bytesWriter.writeVInt((suffix<<1)|1); - bytesWriter.writeBytes(block.prefix.bytes, prefixLength, suffix); + suffixWriter.writeVInt((suffix<<1)|1); + suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix); assert block.fp < startFP; // if (DEBUG) { @@ -926,7 +944,7 @@ public class TempBlockTermsWriter extends FieldsConsumer { // System.out.println(" write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); // } - bytesWriter.writeVLong(startFP - block.fp); + suffixWriter.writeVLong(startFP - block.fp); subIndices.add(block.index); } } @@ -939,17 +957,19 @@ public class TempBlockTermsWriter extends FieldsConsumer { // search on lookup // Write suffixes byte[] blob to terms dict output: - out.writeVInt((int) (bytesWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0)); - bytesWriter.writeTo(out); - bytesWriter.reset(); + out.writeVInt((int) (suffixWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0)); + suffixWriter.writeTo(out); + suffixWriter.reset(); // Write term stats byte[] blob - out.writeVInt((int) bytesWriter2.getFilePointer()); - bytesWriter2.writeTo(out); - bytesWriter2.reset(); + out.writeVInt((int) statsWriter.getFilePointer()); + statsWriter.writeTo(out); + statsWriter.reset(); - // Write term metadata block - flushTermsBlock(futureTermCount+termCount, termCount); + // Write term meta data byte[] blob + out.writeVInt((int) metaWriter.getFilePointer()); + metaWriter.writeTo(out); + metaWriter.reset(); // Remove slice replaced by block: slice.clear(); @@ -969,42 +989,6 @@ public class TempBlockTermsWriter extends FieldsConsumer { return new PendingBlock(prefix, startFP, termCount != 0, isFloor, floorLeadByte, subIndices); } - /** Flush count terms starting at start "backwards", as a - * block. start is a negative offset from the end of the - * terms stack, ie bigger start means further back in - * the stack. */ - void flushTermsBlock(int start, int count) throws IOException { - if (count == 0) { - out.writeByte((byte) 0); - return; - } - - assert start <= pendingMetaData.size(); - assert count <= start; - - final int limit = pendingMetaData.size() - start + count; - final int size = postingsWriter.longsSize(); - - long[] lastLongs = new long[size]; - Arrays.fill(lastLongs, 0); - for(int idx=limit-count; idx= 0; - bytesWriter3.writeVLong(meta.longs[pos] - lastLongs[pos]); - } - lastLongs = meta.longs; - meta.bytesWriter.writeTo(bytesWriter3); - } - - out.writeVInt((int) bytesWriter3.getFilePointer()); - bytesWriter3.writeTo(out); - bytesWriter3.reset(); - - // Remove the terms we just wrote: - pendingMetaData.subList(limit-count, limit).clear(); - } - TermsWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; @@ -1045,9 +1029,6 @@ public class TempBlockTermsWriter extends FieldsConsumer { private final IntsRef scratchIntsRef = new IntsRef(); - private final List pendingMetaData = new ArrayList(); - private final RAMOutputStream bytesWriter3 = new RAMOutputStream(); - @Override public void finishTerm(BytesRef text, TermStats stats) throws IOException { @@ -1055,11 +1036,15 @@ public class TempBlockTermsWriter extends FieldsConsumer { //if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq); blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput()); - PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), stats); - PendingMetaData meta = new PendingMetaData(postingsWriter.longsSize()); + + long[] longs = new long[postingsWriter.longsSize()]; + postingsWriter.finishTerm(longs, metaWriter, stats); + byte[] bytes = new byte[(int)metaWriter.getFilePointer()]; + metaWriter.writeTo(bytes, 0); + metaWriter.reset(); + + PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), stats, longs, bytes); pending.add(term); - postingsWriter.finishTerm(meta.longs, meta.bytesWriter, stats); - pendingMetaData.add(meta); numTerms++; } @@ -1107,8 +1092,9 @@ public class TempBlockTermsWriter extends FieldsConsumer { } } - private final RAMOutputStream bytesWriter = new RAMOutputStream(); - private final RAMOutputStream bytesWriter2 = new RAMOutputStream(); + private final RAMOutputStream suffixWriter = new RAMOutputStream(); + private final RAMOutputStream statsWriter = new RAMOutputStream(); + private final RAMOutputStream metaWriter = new RAMOutputStream(); } @Override