LUCENE-5028: remove doShare from FST's PositiveIntOutputs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1488987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-06-03 13:33:39 +00:00
parent e38d831c4f
commit 77e064a334
15 changed files with 48 additions and 71 deletions

View File

@ -123,6 +123,9 @@ Bug Fixes
some readers did not have the requested numeric DV field.
(Rob Audenaerde, Shai Erera)
* LUCENE-5028: Remove pointless and confusing doShare option in FST's
PositiveIntOutputs (Han Jiang via Mike McCandless)
Optimizations
* LUCENE-4936: Improve numeric doc values compression in case all values share

View File

@ -44,7 +44,7 @@ public final class TokenInfoDictionary extends BinaryDictionary {
try {
is = getResource(FST_FILENAME_SUFFIX);
is = new BufferedInputStream(is);
fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton(true));
fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
} catch (IOException ioe) {
priorE = ioe;
} finally {

View File

@ -88,7 +88,7 @@ public final class UserDictionary implements Dictionary {
List<String> data = new ArrayList<String>(featureEntries.size());
List<int[]> segmentations = new ArrayList<int[]>(featureEntries.size());
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, fstOutput);
IntsRef scratch = new IntsRef();
long ord = 0;

View File

@ -131,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0

View File

@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Util; // for toDot
* @lucene.experimental */
public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(true);
private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
private int indexDivisor;
// Closed if indexLoaded is true:
@ -199,7 +199,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
if (indexDivisor > 1) {
// subsample
final IntsRef scratchIntsRef = new IntsRef();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
BytesRefFSTEnum.InputOutput<Long> result;

View File

@ -235,7 +235,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
this.fieldInfo = fieldInfo;
fstOutputs = PositiveIntOutputs.getSingleton(true);
fstOutputs = PositiveIntOutputs.getSingleton();
fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
indexStart = out.getFilePointer();
////System.out.println("VGW: field=" + fieldInfo.name);

View File

@ -513,7 +513,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
private void loadTerms() throws IOException {
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,

View File

@ -245,7 +245,7 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
IntsRef scratch = new IntsRef();
long ord = 0;

View File

@ -278,7 +278,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}
@ -352,7 +352,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}

View File

@ -33,26 +33,13 @@ public final class PositiveIntOutputs extends Outputs<Long> {
private final static Long NO_OUTPUT = new Long(0);
private final boolean doShare;
private final static PositiveIntOutputs singleton = new PositiveIntOutputs();
private final static PositiveIntOutputs singletonShare = new PositiveIntOutputs(true);
private final static PositiveIntOutputs singletonNoShare = new PositiveIntOutputs(false);
private PositiveIntOutputs(boolean doShare) {
this.doShare = doShare;
private PositiveIntOutputs() {
}
/** Returns the instance of PositiveIntOutputs. */
public static PositiveIntOutputs getSingleton() {
return getSingleton(true);
}
/** Expert: pass doShare=false to disable output sharing.
* In some cases this may result in a smaller FST,
* however it will also break methods like {@link
* Util#getByOutput} and {@link Util#shortestPaths}. */
public static PositiveIntOutputs getSingleton(boolean doShare) {
return doShare ? singletonShare : singletonNoShare;
return singleton;
}
@Override
@ -61,14 +48,10 @@ public final class PositiveIntOutputs extends Outputs<Long> {
assert valid(output2);
if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
return NO_OUTPUT;
} else if (doShare) {
} else {
assert output1 > 0;
assert output2 > 0;
return Math.min(output1, output2);
} else if (output1.equals(output2)) {
return output1;
} else {
return NO_OUTPUT;
}
}
@ -134,6 +117,6 @@ public final class PositiveIntOutputs extends Outputs<Long> {
@Override
public String toString() {
return "PositiveIntOutputs(doShare=" + doShare + ")";
return "PositiveIntOutputs";
}
}

View File

@ -93,9 +93,7 @@ public final class Util {
*
* <p>NOTE: this only works with {@code FST<Long>}, only
* works when the outputs are ascending in order with
* the inputs and only works when you shared
* the outputs (pass doShare=true to {@link
* PositiveIntOutputs#getSingleton}).
* the inputs.
* For example, simple ordinals (0, 1,
* 2, ...), or file offets (when appending to a file)
* fit this. */
@ -517,11 +515,7 @@ public final class Util {
}
/** Starting from node, find the top N min cost
* completions to a final node.
*
* <p>NOTE: you must share the outputs when you build the
* FST (pass doShare=true to {@link
* PositiveIntOutputs#getSingleton}). */
* completions to a final node. */
public static <T> MinResult<T>[] shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
boolean allowEmptyString) throws IOException {

View File

@ -43,7 +43,7 @@ FST Construction example:
String inputValues[] = {"cat", "dog", "dogs"};
long outputValues[] = {5, 7, 12};
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder&lt;Long&gt; builder = new Builder&lt;Long&gt;(INPUT_TYPE.BYTE1, outputs);
BytesRef scratchBytes = new BytesRef();
IntsRef scratchInts = new IntsRef();
@ -60,8 +60,7 @@ Retrieval by key:
</pre>
Retrieval by value:
<pre class="prettyprint">
// Only works because outputs are also in sorted order, and
// we passed 'true' for sharing to PositiveIntOutputs.getSingleton
// Only works because outputs are also in sorted order
IntsRef key = Util.getByOutput(fst, 12);
System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs
</pre>
@ -77,7 +76,6 @@ Iterate over key-value pairs in sorted order:
</pre>
N-shortest paths by weight:
<pre class="prettyprint">
// Only works because we passed 'true' for sharing to PositiveIntOutputs.getSingleton
Comparator&lt;Long&gt; comparator = new Comparator&lt;Long&gt;() {
public int compare(Long left, Long right) {
return left.compareTo(right);

View File

@ -126,7 +126,7 @@ public class TestFSTs extends LuceneTestCase {
// FST ord pos int
{
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
for(int idx=0;idx<terms2.length;idx++) {
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], (long) idx));
@ -171,7 +171,7 @@ public class TestFSTs extends LuceneTestCase {
// PositiveIntOutput (ord)
{
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
for(int idx=0;idx<terms.length;idx++) {
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], (long) idx));
@ -181,8 +181,7 @@ public class TestFSTs extends LuceneTestCase {
// PositiveIntOutput (random monotonically increasing positive number)
{
final boolean doShare = random().nextBoolean();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(doShare);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
long lastOutput = 0;
for(int idx=0;idx<terms.length;idx++) {
@ -190,12 +189,12 @@ public class TestFSTs extends LuceneTestCase {
lastOutput = value;
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], value));
}
new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, doShare).doTest(true);
new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
}
// PositiveIntOutput (random positive number)
{
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random().nextBoolean());
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
for(int idx=0;idx<terms.length;idx++) {
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], _TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
@ -205,8 +204,8 @@ public class TestFSTs extends LuceneTestCase {
// Pair<ord, (random monotonically increasing positive number>
{
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(random().nextBoolean());
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(random().nextBoolean());
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
final List<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>> pairs = new ArrayList<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>>(terms.length);
long lastOutput = 0;
@ -306,7 +305,7 @@ public class TestFSTs extends LuceneTestCase {
}
IndexReader r = DirectoryReader.open(writer, true);
writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random().nextBoolean());
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean doRewrite = random().nextBoolean();
@ -653,8 +652,8 @@ public class TestFSTs extends LuceneTestCase {
if (storeOrds && storeDocFreqs) {
// Store both ord & docFreq:
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
Random rand;
@ -669,7 +668,7 @@ public class TestFSTs extends LuceneTestCase {
}.run(limit, verify, false);
} else if (storeOrds) {
// Store only ords
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
@Override
public Long getOutput(IntsRef input, int ord) {
@ -678,7 +677,7 @@ public class TestFSTs extends LuceneTestCase {
}.run(limit, verify, true);
} else if (storeDocFreqs) {
// Store only docFreq
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
Random rand;
@Override
@ -781,7 +780,7 @@ public class TestFSTs extends LuceneTestCase {
// smaller FST if the outputs grow monotonically. But
// if numbers are "random", false should give smaller
// final size:
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
// Build an FST mapping BytesRef -> Long
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
@ -1100,7 +1099,7 @@ public class TestFSTs extends LuceneTestCase {
}
public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
@ -1115,7 +1114,7 @@ public class TestFSTs extends LuceneTestCase {
}
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean willRewrite = random().nextBoolean();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
@ -1136,7 +1135,7 @@ public class TestFSTs extends LuceneTestCase {
// Make sure raw FST can differentiate between final vs
// non-final end nodes
public void testNonFinalStopNode() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Long nothing = outputs.getNoOutput();
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
@ -1216,7 +1215,7 @@ public class TestFSTs extends LuceneTestCase {
};
public void testShortestPaths() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
@ -1258,8 +1257,8 @@ public class TestFSTs extends LuceneTestCase {
public void testShortestPathsWFST() throws Exception {
PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
PositiveIntOutputs.getSingleton(true), // weight
PositiveIntOutputs.getSingleton(true) // output
PositiveIntOutputs.getSingleton(), // weight
PositiveIntOutputs.getSingleton() // output
);
final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
@ -1301,7 +1300,7 @@ public class TestFSTs extends LuceneTestCase {
final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
final TreeSet<String> allPrefixes = new TreeSet<String>();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
@ -1416,8 +1415,8 @@ public class TestFSTs extends LuceneTestCase {
final TreeSet<String> allPrefixes = new TreeSet<String>();
PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
PositiveIntOutputs.getSingleton(true), // weight
PositiveIntOutputs.getSingleton(true) // output
PositiveIntOutputs.getSingleton(), // weight
PositiveIntOutputs.getSingleton() // output
);
final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();

View File

@ -512,7 +512,7 @@ public class AnalyzingSuggester extends Lookup {
reader = new Sort.ByteSequencesReader(tempSorted);
PairOutputs<Long,BytesRef> outputs = new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton());
PairOutputs<Long,BytesRef> outputs = new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
Builder<Pair<Long,BytesRef>> builder = new Builder<Pair<Long,BytesRef>>(FST.INPUT_TYPE.BYTE1, outputs);
// Build FST:
@ -634,7 +634,7 @@ public class AnalyzingSuggester extends Lookup {
public boolean load(InputStream input) throws IOException {
DataInput dataIn = new InputStreamDataInput(input);
try {
this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton()));
this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
maxAnalyzedPathsForOneInput = dataIn.readVInt();
hasPayloads = dataIn.readByte() == 1;
} finally {

View File

@ -101,7 +101,7 @@ public class WFSTCompletionLookup extends Lookup {
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
while ((scratch = iter.next()) != null) {
long cost = iter.weight();
@ -136,7 +136,7 @@ public class WFSTCompletionLookup extends Lookup {
@Override
public boolean load(InputStream input) throws IOException {
try {
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true));
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton());
} finally {
IOUtils.close(input);
}