LCUENE-5675: testBasic still fails but closer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595072 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-05-15 23:17:37 +00:00
parent f49e67456b
commit 83332c046b
9 changed files with 50 additions and 40 deletions

View File

@ -62,10 +62,10 @@ public class IDVersionPostingsFormat extends PostingsFormat {
PostingsWriterBase postingsWriter = new IDVersionPostingsWriter();
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state,
postingsWriter,
minTermsInBlock,
maxTermsInBlock);
FieldsConsumer ret = new VersionBlockTreeTermsWriter(state,
postingsWriter,
minTermsInBlock,
maxTermsInBlock);
success = true;
return ret;
} finally {
@ -80,12 +80,12 @@ public class IDVersionPostingsFormat extends PostingsFormat {
PostingsReaderBase postingsReader = new IDVersionPostingsReader();
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(state.directory,
state.fieldInfos,
state.segmentInfo,
postingsReader,
state.context,
state.segmentSuffix);
FieldsProducer ret = new VersionBlockTreeTermsReader(state.directory,
state.fieldInfos,
state.segmentInfo,
postingsReader,
state.context,
state.segmentSuffix);
success = true;
return ret;
} finally {

View File

@ -42,9 +42,6 @@ import org.apache.lucene.util.fst.Util;
/** Iterates through terms in this field */
public final class IDVersionSegmentTermsEnum extends TermsEnum {
final static Outputs<Pair<BytesRef,Long>> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
final static Pair<BytesRef,Long> NO_OUTPUT = fstOutputs.getNoOutput();
// Lazy init:
IndexInput in;
@ -154,6 +151,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
final long code = scratchReader.readVLong();
final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
f.hasTermsOrig = f.hasTerms;
f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
@ -222,6 +220,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
if (fr.index == null) {
throw new IllegalStateException("terms index was not loaded");
}
System.out.println("seekExact target=" + target + " minIDVersion=" + minIDVersion);
if (term.bytes.length <= target.length) {
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
@ -282,8 +281,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
//System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
//}
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
}
if (arc.isFinal()) {
lastFrame = stack[1+lastFrame.ord];
@ -358,6 +357,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
targetBeforeCurrentLength = -1;
arc = fr.index.getFirstArc(arcs[0]);
System.out.println("first arc=" + arc);
// Empty string prefix must have an output (block) in the index!
assert arc.isFinal();
@ -373,7 +373,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
//term.length = 0;
targetUpto = 0;
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
}
// if (DEBUG) {
@ -407,8 +407,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
// }
return false;
}
System.out.println(" check output=" +((output.output2)));
if ((Long.MAX_VALUE-output.output2) < minIDVersion) {
if (currentFrame.maxIDVersion < minIDVersion) {
// The max version for all terms in this block is lower than the minVersion
return false;
}
@ -439,8 +440,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
term.bytes[targetUpto] = (byte) targetLabel;
// Aggregate output as we go:
assert arc.output != null;
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
}
// if (DEBUG) {
@ -450,7 +451,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
if (arc.isFinal()) {
//if (DEBUG) System.out.println(" arc is final!");
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
}
}
@ -555,8 +556,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
// seek; but, often the FST doesn't have any
// shared bytes (but this could change if we
// reverse vLong byte order)
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
}
if (arc.isFinal()) {
lastFrame = stack[1+lastFrame.ord];
@ -641,7 +642,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
//term.length = 0;
targetUpto = 0;
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
}
//if (DEBUG) {
@ -696,8 +697,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
arc = nextArc;
// Aggregate output as we go:
assert arc.output != null;
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
}
//if (DEBUG) {
@ -707,7 +708,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
if (arc.isFinal()) {
//if (DEBUG) System.out.println(" arc is final!");
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
}
}

View File

@ -36,6 +36,8 @@ final class IDVersionSegmentTermsEnumFrame {
boolean hasTermsOrig;
boolean isFloor;
long maxIDVersion;
FST.Arc<Pair<BytesRef,Long>> arc;
// File pointer where this block was loaded from

View File

@ -152,8 +152,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
}
PairOutputs<BytesRef,Long> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final long numTerms = in.readVLong();
@ -163,7 +161,8 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
in.readBytes(code.bytes, 0, numBytes);
code.length = numBytes;
final long version = in.readVLong();
final Pair<BytesRef,Long> rootCode = fstOutputs.newPair(code, version);
System.out.println(" read code=" +code + " version=" + version);
final Pair<BytesRef,Long> rootCode = VersionBlockTreeTermsWriter.FST_OUTPUTS.newPair(code, version);
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
assert fieldInfo != null: "field=" + field;
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();

View File

@ -194,6 +194,11 @@ import org.apache.lucene.util.packed.PackedInts;
// nocommit fix jdocs
final class VersionBlockTreeTermsWriter extends FieldsConsumer {
public static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
PositiveIntOutputs.getSingleton());
public static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();
/** Suggested default value for the {@code
* minItemsInBlock} parameter to {@link
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
@ -476,10 +481,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
}
}
final PairOutputs<BytesRef,Long> outputs = getFSTOutputs();
final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
outputs, null, false,
FST_OUTPUTS, null, false,
PackedInts.COMPACT, true, 15);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
@ -488,7 +492,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
assert bytes.length > 0;
scratchBytes.writeTo(bytes, 0);
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), outputs.newPair(new BytesRef(bytes, 0, bytes.length), Long.MAX_VALUE - maxVersionIndex));
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), FST_OUTPUTS.newPair(new BytesRef(bytes, 0, bytes.length), Long.MAX_VALUE - maxVersionIndex));
scratchBytes.reset();
// Copy over index for all sub-blocks
@ -536,11 +540,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
}
}
static PairOutputs<BytesRef,Long> getFSTOutputs() {
return new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
PositiveIntOutputs.getSingleton());
}
final RAMOutputStream scratchBytes = new RAMOutputStream();
class TermsWriter {

View File

@ -75,7 +75,7 @@ final class VersionFieldReader extends Terms {
final IndexInput clone = indexIn.clone();
//System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
clone.seek(indexStartFP);
index = new FST<>(clone, VersionBlockTreeTermsWriter.getFSTOutputs());
index = new FST<>(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);
/*
if (false) {

View File

@ -23,4 +23,4 @@ org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
#org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
org.apache.lucene.codecs.idversion.IDVersionPostingsFormat

View File

@ -48,7 +48,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
doc.add(makeIDField("id0", 100));
w.addDocument(doc);
IndexReader r = w.getReader();
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) MultiFields.getTerms(r, "id").iterator(null);
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator(null);
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
r.close();
@ -63,6 +63,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
Field field = newTextField("id", "", Field.Store.NO);
Token token = new Token(id, 0, id.length());
BytesRef payload = new BytesRef(8);
payload.length = 8;
IDVersionPostingsFormat.longToBytes(100, payload);
token.setPayload(payload);
field.setTokenStream(new CannedTokenStream(token));

View File

@ -61,6 +61,11 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
public int hashCode() {
return output1.hashCode() + output2.hashCode();
}
@Override
public String toString() {
return "Pair(" + output1 + "," + output2 + ")";
}
};
public PairOutputs(Outputs<A> outputs1, Outputs<B> outputs2) {
@ -93,15 +98,18 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
final boolean noOutput2 = pair.output2.equals(outputs2.getNoOutput());
if (noOutput1 && pair.output1 != outputs1.getNoOutput()) {
System.out.println("no1");
return false;
}
if (noOutput2 && pair.output2 != outputs2.getNoOutput()) {
System.out.println("no2");
return false;
}
if (noOutput1 && noOutput2) {
if (pair != NO_OUTPUT) {
System.out.println("no3");
return false;
} else {
return true;