mirror of https://github.com/apache/lucene.git
LUCENE-3069: steal bit to encode TTF
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1502152 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9181dbc0eb
commit
3c82f0a0d2
|
@ -20,6 +20,8 @@ package org.apache.lucene.codecs.temp;
|
|||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
@ -164,7 +166,7 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(fieldInfo, longsSize));
|
||||
//PrintWriter pw = new PrintWriter(new File("../graphs/ohohoh."+tmpname+".xxx.txt"));
|
||||
//PrintWriter pw = new PrintWriter(new File("ohohoh."+tmpname+".xxx.txt"));
|
||||
//Util.toDot(dict, pw, false, false);
|
||||
//pw.close();
|
||||
}
|
||||
|
@ -296,7 +298,6 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
seekPending = false;
|
||||
}
|
||||
|
||||
// nocommit: reuse?
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
|
@ -363,4 +364,28 @@ public class TempFSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
}
|
||||
static<T> void walk(FST<T> fst) throws IOException {
|
||||
final ArrayList<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
|
||||
final BitSet seen = new BitSet();
|
||||
final FST.BytesReader reader = fst.getBytesReader();
|
||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
||||
queue.add(startArc);
|
||||
while (!queue.isEmpty()) {
|
||||
final FST.Arc<T> arc = queue.remove(0);
|
||||
final long node = arc.target;
|
||||
//System.out.println(arc);
|
||||
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
|
||||
//seen.set((int) node);
|
||||
fst.readFirstRealTargetArc(node, arc, reader);
|
||||
while (true) {
|
||||
queue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
if (arc.isLast()) {
|
||||
break;
|
||||
} else {
|
||||
fst.readNextRealArc(arc, reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -246,9 +246,15 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
out.writeBytes(data.bytes, 0, data.bytes.length);
|
||||
}
|
||||
if (bit2 > 0) { // stats exist
|
||||
out.writeVInt(data.docFreq);
|
||||
if (hasPos) {
|
||||
out.writeVLong(data.totalTermFreq - data.docFreq);
|
||||
if (data.docFreq == data.totalTermFreq) {
|
||||
out.writeVInt((data.docFreq << 1) | 1);
|
||||
} else {
|
||||
out.writeVInt((data.docFreq << 1));
|
||||
out.writeVLong(data.totalTermFreq - data.docFreq);
|
||||
}
|
||||
} else {
|
||||
out.writeVInt(data.docFreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -277,9 +283,14 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
|||
in.readBytes(bytes, 0, bytesSize);
|
||||
}
|
||||
if (bit2 > 0) { // stats exist
|
||||
docFreq = in.readVInt();
|
||||
int code = in.readVInt();
|
||||
if (hasPos) {
|
||||
totalTermFreq = docFreq + in.readVLong();
|
||||
totalTermFreq = docFreq = code >>> 1;
|
||||
if ((code & 1) == 0) {
|
||||
totalTermFreq += in.readVLong();
|
||||
}
|
||||
} else {
|
||||
docFreq = code;
|
||||
}
|
||||
}
|
||||
return new TempMetaData(longs, bytes, docFreq, totalTermFreq);
|
||||
|
|
Loading…
Reference in New Issue