mirror of https://github.com/apache/lucene.git
LCUENE-5675: testBasic still fails but closer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595072 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f49e67456b
commit
83332c046b
|
@ -62,10 +62,10 @@ public class IDVersionPostingsFormat extends PostingsFormat {
|
|||
PostingsWriterBase postingsWriter = new IDVersionPostingsWriter();
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
||||
postingsWriter,
|
||||
minTermsInBlock,
|
||||
maxTermsInBlock);
|
||||
FieldsConsumer ret = new VersionBlockTreeTermsWriter(state,
|
||||
postingsWriter,
|
||||
minTermsInBlock,
|
||||
maxTermsInBlock);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
@ -80,12 +80,12 @@ public class IDVersionPostingsFormat extends PostingsFormat {
|
|||
PostingsReaderBase postingsReader = new IDVersionPostingsReader();
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new BlockTreeTermsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
postingsReader,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
FieldsProducer ret = new VersionBlockTreeTermsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
postingsReader,
|
||||
state.context,
|
||||
state.segmentSuffix);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
|
|
@ -42,9 +42,6 @@ import org.apache.lucene.util.fst.Util;
|
|||
/** Iterates through terms in this field */
|
||||
public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||
|
||||
final static Outputs<Pair<BytesRef,Long>> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
|
||||
final static Pair<BytesRef,Long> NO_OUTPUT = fstOutputs.getNoOutput();
|
||||
|
||||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
||||
|
@ -154,6 +151,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
final long code = scratchReader.readVLong();
|
||||
final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
|
||||
final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
|
||||
f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
|
||||
f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
|
||||
f.hasTermsOrig = f.hasTerms;
|
||||
f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
|
||||
|
@ -222,6 +220,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
if (fr.index == null) {
|
||||
throw new IllegalStateException("terms index was not loaded");
|
||||
}
|
||||
System.out.println("seekExact target=" + target + " minIDVersion=" + minIDVersion);
|
||||
|
||||
if (term.bytes.length <= target.length) {
|
||||
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
|
||||
|
@ -282,8 +281,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
//System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
|
||||
//}
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -358,6 +357,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
targetBeforeCurrentLength = -1;
|
||||
arc = fr.index.getFirstArc(arcs[0]);
|
||||
System.out.println("first arc=" + arc);
|
||||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
|
@ -373,7 +373,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -407,8 +407,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// }
|
||||
return false;
|
||||
}
|
||||
System.out.println(" check output=" +((output.output2)));
|
||||
|
||||
if ((Long.MAX_VALUE-output.output2) < minIDVersion) {
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
return false;
|
||||
}
|
||||
|
@ -439,8 +440,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -450,7 +451,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -555,8 +556,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// seek; but, often the FST doesn't have any
|
||||
// shared bytes (but this could change if we
|
||||
// reverse vLong byte order)
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -641,7 +642,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -696,8 +697,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
arc = nextArc;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -707,7 +708,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
boolean hasTermsOrig;
|
||||
boolean isFloor;
|
||||
|
||||
long maxIDVersion;
|
||||
|
||||
FST.Arc<Pair<BytesRef,Long>> arc;
|
||||
|
||||
// File pointer where this block was loaded from
|
||||
|
|
|
@ -152,8 +152,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
|||
throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
|
||||
}
|
||||
|
||||
PairOutputs<BytesRef,Long> fstOutputs = VersionBlockTreeTermsWriter.getFSTOutputs();
|
||||
|
||||
for(int i=0;i<numFields;i++) {
|
||||
final int field = in.readVInt();
|
||||
final long numTerms = in.readVLong();
|
||||
|
@ -163,7 +161,8 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
|||
in.readBytes(code.bytes, 0, numBytes);
|
||||
code.length = numBytes;
|
||||
final long version = in.readVLong();
|
||||
final Pair<BytesRef,Long> rootCode = fstOutputs.newPair(code, version);
|
||||
System.out.println(" read code=" +code + " version=" + version);
|
||||
final Pair<BytesRef,Long> rootCode = VersionBlockTreeTermsWriter.FST_OUTPUTS.newPair(code, version);
|
||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
assert fieldInfo != null: "field=" + field;
|
||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||
|
|
|
@ -194,6 +194,11 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
// nocommit fix jdocs
|
||||
final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||
|
||||
public static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
|
||||
PositiveIntOutputs.getSingleton());
|
||||
|
||||
public static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();
|
||||
|
||||
/** Suggested default value for the {@code
|
||||
* minItemsInBlock} parameter to {@link
|
||||
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
|
@ -476,10 +481,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
final PairOutputs<BytesRef,Long> outputs = getFSTOutputs();
|
||||
final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
outputs, null, false,
|
||||
FST_OUTPUTS, null, false,
|
||||
PackedInts.COMPACT, true, 15);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
|
@ -488,7 +492,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
|
||||
assert bytes.length > 0;
|
||||
scratchBytes.writeTo(bytes, 0);
|
||||
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), outputs.newPair(new BytesRef(bytes, 0, bytes.length), Long.MAX_VALUE - maxVersionIndex));
|
||||
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), FST_OUTPUTS.newPair(new BytesRef(bytes, 0, bytes.length), Long.MAX_VALUE - maxVersionIndex));
|
||||
scratchBytes.reset();
|
||||
|
||||
// Copy over index for all sub-blocks
|
||||
|
@ -536,11 +540,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
static PairOutputs<BytesRef,Long> getFSTOutputs() {
|
||||
return new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
|
||||
PositiveIntOutputs.getSingleton());
|
||||
}
|
||||
|
||||
final RAMOutputStream scratchBytes = new RAMOutputStream();
|
||||
|
||||
class TermsWriter {
|
||||
|
|
|
@ -75,7 +75,7 @@ final class VersionFieldReader extends Terms {
|
|||
final IndexInput clone = indexIn.clone();
|
||||
//System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
|
||||
clone.seek(indexStartFP);
|
||||
index = new FST<>(clone, VersionBlockTreeTermsWriter.getFSTOutputs());
|
||||
index = new FST<>(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);
|
||||
|
||||
/*
|
||||
if (false) {
|
||||
|
|
|
@ -23,4 +23,4 @@ org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
|
|||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
||||
|
||||
#org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
|
||||
org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
|
||||
|
|
|
@ -48,7 +48,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
doc.add(makeIDField("id0", 100));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) MultiFields.getTerms(r, "id").iterator(null);
|
||||
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator(null);
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
|
||||
r.close();
|
||||
|
@ -63,6 +63,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
Field field = newTextField("id", "", Field.Store.NO);
|
||||
Token token = new Token(id, 0, id.length());
|
||||
BytesRef payload = new BytesRef(8);
|
||||
payload.length = 8;
|
||||
IDVersionPostingsFormat.longToBytes(100, payload);
|
||||
token.setPayload(payload);
|
||||
field.setTokenStream(new CannedTokenStream(token));
|
||||
|
|
|
@ -61,6 +61,11 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
|||
public int hashCode() {
|
||||
return output1.hashCode() + output2.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Pair(" + output1 + "," + output2 + ")";
|
||||
}
|
||||
};
|
||||
|
||||
public PairOutputs(Outputs<A> outputs1, Outputs<B> outputs2) {
|
||||
|
@ -93,15 +98,18 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
|||
final boolean noOutput2 = pair.output2.equals(outputs2.getNoOutput());
|
||||
|
||||
if (noOutput1 && pair.output1 != outputs1.getNoOutput()) {
|
||||
System.out.println("no1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput2 && pair.output2 != outputs2.getNoOutput()) {
|
||||
System.out.println("no2");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput1 && noOutput2) {
|
||||
if (pair != NO_OUTPUT) {
|
||||
System.out.println("no3");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue