LUCENE-3069: steal bit to encode TTF

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1502152 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Han Jiang 2013-07-11 08:35:35 +00:00
parent 9181dbc0eb
commit 3c82f0a0d2
2 changed files with 42 additions and 6 deletions

View File

@ -20,6 +20,8 @@ package org.apache.lucene.codecs.temp;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.File;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@ -164,7 +166,7 @@ public class TempFSTTermsReader extends FieldsProducer {
this.docCount = docCount;
this.longsSize = longsSize;
this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(fieldInfo, longsSize));
//PrintWriter pw = new PrintWriter(new File("../graphs/ohohoh."+tmpname+".xxx.txt"));
//PrintWriter pw = new PrintWriter(new File("ohohoh."+tmpname+".xxx.txt"));
//Util.toDot(dict, pw, false, false);
//pw.close();
}
@ -296,7 +298,6 @@ public class TempFSTTermsReader extends FieldsProducer {
seekPending = false;
}
// nocommit: reuse?
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
decodeMetaData();
@ -363,4 +364,28 @@ public class TempFSTTermsReader extends FieldsProducer {
}
}
}
static<T> void walk(FST<T> fst) throws IOException {
final ArrayList<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
final BitSet seen = new BitSet();
final FST.BytesReader reader = fst.getBytesReader();
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
queue.add(startArc);
while (!queue.isEmpty()) {
final FST.Arc<T> arc = queue.remove(0);
final long node = arc.target;
//System.out.println(arc);
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
//seen.set((int) node);
fst.readFirstRealTargetArc(node, arc, reader);
while (true) {
queue.add(new FST.Arc<T>().copyFrom(arc));
if (arc.isLast()) {
break;
} else {
fst.readNextRealArc(arc, reader);
}
}
}
}
}
}

View File

@ -246,9 +246,15 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
out.writeBytes(data.bytes, 0, data.bytes.length);
}
if (bit2 > 0) { // stats exist
out.writeVInt(data.docFreq);
if (hasPos) {
out.writeVLong(data.totalTermFreq - data.docFreq);
if (data.docFreq == data.totalTermFreq) {
out.writeVInt((data.docFreq << 1) | 1);
} else {
out.writeVInt((data.docFreq << 1));
out.writeVLong(data.totalTermFreq - data.docFreq);
}
} else {
out.writeVInt(data.docFreq);
}
}
}
@ -277,9 +283,14 @@ public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
in.readBytes(bytes, 0, bytesSize);
}
if (bit2 > 0) { // stats exist
docFreq = in.readVInt();
int code = in.readVInt();
if (hasPos) {
totalTermFreq = docFreq + in.readVLong();
totalTermFreq = docFreq = code >>> 1;
if ((code & 1) == 0) {
totalTermFreq += in.readVLong();
}
} else {
docFreq = code;
}
}
return new TempMetaData(longs, bytes, docFreq, totalTermFreq);