mirror of https://github.com/apache/lucene.git
javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1332646 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
841c9346ef
commit
981c324e58
|
@ -304,7 +304,12 @@ public class Builder<T> {
|
|||
|
||||
/** It's OK to add the same input twice in a row with
|
||||
* different outputs, as long as outputs impls the merge
|
||||
* method. */
|
||||
* method. Note that input is fully consumed after this
|
||||
* method is returned (so caller is free to reuse), but
|
||||
* output is not. So if your outputs are changeable (eg
|
||||
* {@link ByteSequenceOutputs} or {@link
|
||||
* IntSequenceOutputs}) then you cannot reuse across
|
||||
* calls. */
|
||||
public void add(IntsRef input, T output) throws IOException {
|
||||
/*
|
||||
if (DEBUG) {
|
||||
|
|
|
@ -25,32 +25,33 @@ Finite state transducers
|
|||
This package implements <a href="http://en.wikipedia.org/wiki/Finite_state_transducer">
|
||||
Finite State Transducers</a> with the following characteristics:
|
||||
<ul>
|
||||
<li>Fast construction of the minimal FST
|
||||
<li>Fast and low memory overhead construction of the minimal FST
|
||||
(but inputs must be provided in sorted order)</li>
|
||||
<li>Low object overhead and quick deserialization (byte[] representation)</li>
|
||||
<li>Optional compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
|
||||
<li>Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
|
||||
<li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the
|
||||
outputs are in sorted order (ordinals or file pointers)</li>
|
||||
outputs are in sorted order (e.g., ordinals or file pointers)</li>
|
||||
<li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li>
|
||||
<li>{@link org.apache.lucene.util.fst.Util#shortestPaths N-shortest-paths} search by
|
||||
weight</li>
|
||||
<li>Enumerators ({@link org.apache.lucene.util.fst.IntsRefFSTEnum IntsRef} and {@link org.apache.lucene.util.fst.BytesRefFSTEnum BytesRef}) that behave like {@link java.util.SortedMap SortedMap} iterators
|
||||
</ul>
|
||||
<p>
|
||||
FST Construction example:
|
||||
<pre class="prettyprint">
|
||||
// input values (keys). These must be provided to Builder in sorted order!
|
||||
String inputValues[] = { "cat", "dog", "dogs" };
|
||||
long outputValues[] = { 5, 7, 12 };
|
||||
// Input values (keys). These must be provided to Builder in Unicode sorted order!
|
||||
String inputValues[] = {"cat", "dog", "dogs"};
|
||||
long outputValues[] = {5, 7, 12};
|
||||
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||
BytesRef scratchBytes = new BytesRef();
|
||||
IntsRef scratchInts = new IntsRef();
|
||||
for (int i = 0; i < inputValues.length; i++) {
|
||||
for (int i = 0; i < inputValues.length; i++) {
|
||||
scratchBytes.copyChars(inputValues[i]);
|
||||
builder.add(Util.toIntsRef(scratchBytes, scratchInts), outputValues[i]);
|
||||
}
|
||||
FST<Long> fst = builder.finish();
|
||||
FST<Long> fst = builder.finish();
|
||||
</pre>
|
||||
Retrieval by key:
|
||||
<pre class="prettyprint">
|
||||
|
@ -59,30 +60,31 @@ Retrieval by key:
|
|||
</pre>
|
||||
Retrieval by value:
|
||||
<pre class="prettyprint">
|
||||
// Only works because outputs are in sorted order
|
||||
// Only works because outputs are also in sorted order, and
|
||||
// we passed 'true' for sharing to PositiveIntOutputs.getSingleton
|
||||
IntsRef key = Util.getByOutput(fst, 12);
|
||||
System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs
|
||||
</pre>
|
||||
Iterate over key-value pairs in sorted order:
|
||||
<pre class="prettyprint">
|
||||
// Like TermsEnum, this also supports seeking (advance)
|
||||
BytesRefFSTEnum<Long> iterator = new BytesRefFSTEnum<Long>(fst);
|
||||
BytesRefFSTEnum<Long> iterator = new BytesRefFSTEnum<Long>(fst);
|
||||
while (iterator.next() != null) {
|
||||
InputOutput<Long> mapEntry = iterator.current();
|
||||
InputOutput<Long> mapEntry = iterator.current();
|
||||
System.out.println(mapEntry.input.utf8ToString());
|
||||
System.out.println(mapEntry.output);
|
||||
}
|
||||
</pre>
|
||||
N-shortest paths by weight:
|
||||
<pre class="prettyprint">
|
||||
// Only works because we passed 'true' for sharing to getSingleton
|
||||
Comparator<Long> comparator = new Comparator<Long>() {
|
||||
// Only works because we passed 'true' for sharing to PositiveIntOutputs.getSingleton
|
||||
Comparator<Long> comparator = new Comparator<Long>() {
|
||||
public int compare(Long left, Long right) {
|
||||
return left.compareTo(right);
|
||||
}
|
||||
};
|
||||
Arc<Long> firstArc = fst.getFirstArc(new Arc<Long>());
|
||||
MinResult<Long> paths[] = Util.shortestPaths(fst, firstArc, comparator, 2);
|
||||
Arc<Long> firstArc = fst.getFirstArc(new Arc<Long>());
|
||||
MinResult<Long> paths[] = Util.shortestPaths(fst, firstArc, comparator, 2);
|
||||
System.out.println(Util.toBytesRef(paths[0].input, scratchBytes).utf8ToString()); // cat
|
||||
System.out.println(paths[0].output); // 5
|
||||
System.out.println(Util.toBytesRef(paths[1].input, scratchBytes).utf8ToString()); // dog
|
||||
|
|
Loading…
Reference in New Issue