mirror of https://github.com/apache/lucene.git
LUCENE-5675: move BlockTree* under its own package
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1594991 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60aa1ae3fb
commit
b1c481d683
|
@ -19,13 +19,13 @@ package org.apache.lucene.codecs.idversion;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.codecs.pulsing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
|
||||
|
||||
|
|
|
@ -19,14 +19,14 @@ package org.apache.lucene.codecs.pulsing;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsBaseFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
|
|
@ -22,4 +22,5 @@ org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
|
|||
org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
||||
org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
|
||||
|
||||
#org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs;
|
||||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -17,15 +17,16 @@ package org.apache.lucene.codecs;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
|
@ -290,177 +291,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* BlockTree statistics for a single field
|
||||
* returned by {@link FieldReader#computeStats()}.
|
||||
*/
|
||||
public static class Stats {
|
||||
/** How many nodes in the index FST. */
|
||||
public long indexNodeCount;
|
||||
|
||||
/** How many arcs in the index FST. */
|
||||
public long indexArcCount;
|
||||
|
||||
/** Byte size of the index. */
|
||||
public long indexNumBytes;
|
||||
|
||||
/** Total number of terms in the field. */
|
||||
public long totalTermCount;
|
||||
|
||||
/** Total number of bytes (sum of term lengths) across all terms in the field. */
|
||||
public long totalTermBytes;
|
||||
|
||||
/** The number of normal (non-floor) blocks in the terms file. */
|
||||
public int nonFloorBlockCount;
|
||||
|
||||
/** The number of floor blocks (meta-blocks larger than the
|
||||
* allowed {@code maxItemsPerBlock}) in the terms file. */
|
||||
public int floorBlockCount;
|
||||
|
||||
/** The number of sub-blocks within the floor blocks. */
|
||||
public int floorSubBlockCount;
|
||||
|
||||
/** The number of "internal" blocks (that have both
|
||||
* terms and sub-blocks). */
|
||||
public int mixedBlockCount;
|
||||
|
||||
/** The number of "leaf" blocks (blocks that have only
|
||||
* terms). */
|
||||
public int termsOnlyBlockCount;
|
||||
|
||||
/** The number of "internal" blocks that do not contain
|
||||
* terms (have only sub-blocks). */
|
||||
public int subBlocksOnlyBlockCount;
|
||||
|
||||
/** Total number of blocks. */
|
||||
public int totalBlockCount;
|
||||
|
||||
/** Number of blocks at each prefix depth. */
|
||||
public int[] blockCountByPrefixLen = new int[10];
|
||||
private int startBlockCount;
|
||||
private int endBlockCount;
|
||||
|
||||
/** Total number of bytes used to store term suffixes. */
|
||||
public long totalBlockSuffixBytes;
|
||||
|
||||
/** Total number of bytes used to store term stats (not
|
||||
* including what the {@link PostingsBaseFormat}
|
||||
* stores. */
|
||||
public long totalBlockStatsBytes;
|
||||
|
||||
/** Total bytes stored by the {@link PostingsBaseFormat},
|
||||
* plus the other few vInts stored in the frame. */
|
||||
public long totalBlockOtherBytes;
|
||||
|
||||
/** Segment name. */
|
||||
public final String segment;
|
||||
|
||||
/** Field name. */
|
||||
public final String field;
|
||||
|
||||
Stats(String segment, String field) {
|
||||
this.segment = segment;
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
void startBlock(FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) {
|
||||
totalBlockCount++;
|
||||
if (isFloor) {
|
||||
if (frame.fp == frame.fpOrig) {
|
||||
floorBlockCount++;
|
||||
}
|
||||
floorSubBlockCount++;
|
||||
} else {
|
||||
nonFloorBlockCount++;
|
||||
}
|
||||
|
||||
if (blockCountByPrefixLen.length <= frame.prefix) {
|
||||
blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
|
||||
}
|
||||
blockCountByPrefixLen[frame.prefix]++;
|
||||
startBlockCount++;
|
||||
totalBlockSuffixBytes += frame.suffixesReader.length();
|
||||
totalBlockStatsBytes += frame.statsReader.length();
|
||||
}
|
||||
|
||||
void endBlock(FieldReader.SegmentTermsEnum.Frame frame) {
|
||||
final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
|
||||
final int subBlockCount = frame.entCount - termCount;
|
||||
totalTermCount += termCount;
|
||||
if (termCount != 0 && subBlockCount != 0) {
|
||||
mixedBlockCount++;
|
||||
} else if (termCount != 0) {
|
||||
termsOnlyBlockCount++;
|
||||
} else if (subBlockCount != 0) {
|
||||
subBlocksOnlyBlockCount++;
|
||||
} else {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
endBlockCount++;
|
||||
final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
|
||||
assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
|
||||
totalBlockOtherBytes += otherBytes;
|
||||
}
|
||||
|
||||
void term(BytesRef term) {
|
||||
totalTermBytes += term.length;
|
||||
}
|
||||
|
||||
void finish() {
|
||||
assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
|
||||
assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
|
||||
assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
PrintStream out;
|
||||
try {
|
||||
out = new PrintStream(bos, false, IOUtils.UTF_8);
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
|
||||
out.println(" index FST:");
|
||||
out.println(" " + indexNodeCount + " nodes");
|
||||
out.println(" " + indexArcCount + " arcs");
|
||||
out.println(" " + indexNumBytes + " bytes");
|
||||
out.println(" terms:");
|
||||
out.println(" " + totalTermCount + " terms");
|
||||
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
|
||||
out.println(" blocks:");
|
||||
out.println(" " + totalBlockCount + " blocks");
|
||||
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
|
||||
out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
|
||||
out.println(" " + mixedBlockCount + " mixed blocks");
|
||||
out.println(" " + floorBlockCount + " floor blocks");
|
||||
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
|
||||
out.println(" " + floorSubBlockCount + " floor sub-blocks");
|
||||
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
|
||||
if (totalBlockCount != 0) {
|
||||
out.println(" by prefix length:");
|
||||
int total = 0;
|
||||
for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
|
||||
final int blockCount = blockCountByPrefixLen[prefix];
|
||||
total += blockCount;
|
||||
if (blockCount != 0) {
|
||||
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
|
||||
}
|
||||
}
|
||||
assert totalBlockCount == total;
|
||||
}
|
||||
|
||||
try {
|
||||
return bos.toString(IOUtils.UTF_8);
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
|
||||
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
|
||||
|
||||
|
@ -1310,7 +1140,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Iterates through terms in this field
|
||||
private final class SegmentTermsEnum extends TermsEnum {
|
||||
final class SegmentTermsEnum extends TermsEnum {
|
||||
private IndexInput in;
|
||||
|
||||
private Frame[] stack;
|
||||
|
@ -2308,7 +2138,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
// Not static -- references term, postingsReader,
|
||||
// fieldInfo, in
|
||||
private final class Frame {
|
||||
final class Frame {
|
||||
// Our index in stack[]:
|
||||
final int ord;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs;
|
||||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -21,6 +21,10 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
|
@ -0,0 +1,198 @@
|
|||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* BlockTree statistics for a single field
|
||||
* returned by {@link FieldReader#computeStats()}.
|
||||
*/
|
||||
public class Stats {
|
||||
/** How many nodes in the index FST. */
|
||||
public long indexNodeCount;
|
||||
|
||||
/** How many arcs in the index FST. */
|
||||
public long indexArcCount;
|
||||
|
||||
/** Byte size of the index. */
|
||||
public long indexNumBytes;
|
||||
|
||||
/** Total number of terms in the field. */
|
||||
public long totalTermCount;
|
||||
|
||||
/** Total number of bytes (sum of term lengths) across all terms in the field. */
|
||||
public long totalTermBytes;
|
||||
|
||||
/** The number of normal (non-floor) blocks in the terms file. */
|
||||
public int nonFloorBlockCount;
|
||||
|
||||
/** The number of floor blocks (meta-blocks larger than the
|
||||
* allowed {@code maxItemsPerBlock}) in the terms file. */
|
||||
public int floorBlockCount;
|
||||
|
||||
/** The number of sub-blocks within the floor blocks. */
|
||||
public int floorSubBlockCount;
|
||||
|
||||
/** The number of "internal" blocks (that have both
|
||||
* terms and sub-blocks). */
|
||||
public int mixedBlockCount;
|
||||
|
||||
/** The number of "leaf" blocks (blocks that have only
|
||||
* terms). */
|
||||
public int termsOnlyBlockCount;
|
||||
|
||||
/** The number of "internal" blocks that do not contain
|
||||
* terms (have only sub-blocks). */
|
||||
public int subBlocksOnlyBlockCount;
|
||||
|
||||
/** Total number of blocks. */
|
||||
public int totalBlockCount;
|
||||
|
||||
/** Number of blocks at each prefix depth. */
|
||||
public int[] blockCountByPrefixLen = new int[10];
|
||||
private int startBlockCount;
|
||||
private int endBlockCount;
|
||||
|
||||
/** Total number of bytes used to store term suffixes. */
|
||||
public long totalBlockSuffixBytes;
|
||||
|
||||
/** Total number of bytes used to store term stats (not
|
||||
* including what the {@link PostingsBaseFormat}
|
||||
* stores. */
|
||||
public long totalBlockStatsBytes;
|
||||
|
||||
/** Total bytes stored by the {@link PostingsBaseFormat},
|
||||
* plus the other few vInts stored in the frame. */
|
||||
public long totalBlockOtherBytes;
|
||||
|
||||
/** Segment name. */
|
||||
public final String segment;
|
||||
|
||||
/** Field name. */
|
||||
public final String field;
|
||||
|
||||
Stats(String segment, String field) {
|
||||
this.segment = segment;
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
void startBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) {
|
||||
totalBlockCount++;
|
||||
if (isFloor) {
|
||||
if (frame.fp == frame.fpOrig) {
|
||||
floorBlockCount++;
|
||||
}
|
||||
floorSubBlockCount++;
|
||||
} else {
|
||||
nonFloorBlockCount++;
|
||||
}
|
||||
|
||||
if (blockCountByPrefixLen.length <= frame.prefix) {
|
||||
blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
|
||||
}
|
||||
blockCountByPrefixLen[frame.prefix]++;
|
||||
startBlockCount++;
|
||||
totalBlockSuffixBytes += frame.suffixesReader.length();
|
||||
totalBlockStatsBytes += frame.statsReader.length();
|
||||
}
|
||||
|
||||
void endBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame) {
|
||||
final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
|
||||
final int subBlockCount = frame.entCount - termCount;
|
||||
totalTermCount += termCount;
|
||||
if (termCount != 0 && subBlockCount != 0) {
|
||||
mixedBlockCount++;
|
||||
} else if (termCount != 0) {
|
||||
termsOnlyBlockCount++;
|
||||
} else if (subBlockCount != 0) {
|
||||
subBlocksOnlyBlockCount++;
|
||||
} else {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
endBlockCount++;
|
||||
final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
|
||||
assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
|
||||
totalBlockOtherBytes += otherBytes;
|
||||
}
|
||||
|
||||
void term(BytesRef term) {
|
||||
totalTermBytes += term.length;
|
||||
}
|
||||
|
||||
void finish() {
|
||||
assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
|
||||
assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
|
||||
assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
PrintStream out;
|
||||
try {
|
||||
out = new PrintStream(bos, false, IOUtils.UTF_8);
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
|
||||
out.println(" index FST:");
|
||||
out.println(" " + indexNodeCount + " nodes");
|
||||
out.println(" " + indexArcCount + " arcs");
|
||||
out.println(" " + indexNumBytes + " bytes");
|
||||
out.println(" terms:");
|
||||
out.println(" " + totalTermCount + " terms");
|
||||
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
|
||||
out.println(" blocks:");
|
||||
out.println(" " + totalBlockCount + " blocks");
|
||||
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
|
||||
out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
|
||||
out.println(" " + mixedBlockCount + " mixed blocks");
|
||||
out.println(" " + floorBlockCount + " floor blocks");
|
||||
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
|
||||
out.println(" " + floorSubBlockCount + " floor sub-blocks");
|
||||
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
|
||||
if (totalBlockCount != 0) {
|
||||
out.println(" by prefix length:");
|
||||
int total = 0;
|
||||
for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
|
||||
final int blockCount = blockCountByPrefixLen[prefix];
|
||||
total += blockCount;
|
||||
if (blockCount != 0) {
|
||||
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
|
||||
}
|
||||
}
|
||||
assert totalBlockCount == total;
|
||||
}
|
||||
|
||||
try {
|
||||
return bos.toString(IOUtils.UTF_8);
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,14 +19,14 @@ package org.apache.lucene.codecs.lucene40;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase; // javadocs
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.DocsEnum; // javadocs
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions; // javadocs
|
||||
import org.apache.lucene.index.FieldInfos; // javadocs
|
||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.lucene.codecs.lucene41;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
|
@ -29,6 +27,8 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
|
|
|
@ -28,9 +28,10 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.Stats;
|
||||
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -45,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
|
||||
/**
|
||||
* Basic tool and API to check the health of an index and
|
||||
* write a new segments file that removes reference to
|
||||
|
@ -237,7 +239,7 @@ public class CheckIndex {
|
|||
* tree terms dictionary (this is only set if the
|
||||
* {@link PostingsFormat} for this segment uses block
|
||||
* tree. */
|
||||
public Map<String,BlockTreeTermsReader.Stats> blockTreeStats = null;
|
||||
public Map<String,Stats> blockTreeStats = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1116,7 +1118,7 @@ public class CheckIndex {
|
|||
|
||||
} else {
|
||||
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
|
||||
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
|
||||
final Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
|
||||
assert stats != null;
|
||||
if (status.blockTreeStats == null) {
|
||||
status.blockTreeStats = new HashMap<>();
|
||||
|
@ -1249,7 +1251,7 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
if (verbose && status.blockTreeStats != null && infoStream != null && status.termCount > 0) {
|
||||
for(Map.Entry<String,BlockTreeTermsReader.Stats> ent : status.blockTreeStats.entrySet()) {
|
||||
for(Map.Entry<String,Stats> ent : status.blockTreeStats.entrySet()) {
|
||||
infoStream.println(" field \"" + ent.getKey() + "\":");
|
||||
infoStream.println(" " + ent.getValue().toString().replace("\n", "\n "));
|
||||
}
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.codecs.lucene40;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.lucene.codecs.mockrandom;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
|
@ -36,6 +34,8 @@ import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
|||
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
|
||||
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
|
||||
|
|
|
@ -19,13 +19,13 @@ package org.apache.lucene.codecs.nestedpulsing;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
|
||||
|
|
Loading…
Reference in New Issue