mirror of https://github.com/apache/lucene.git
disperse the old FST TODO file
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457549 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8cde21a2c2
commit
fe6c8d3ea8
|
@ -25,6 +25,9 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// TODO: could we somehow stream an FST to disk while we
|
||||
// build it?
|
||||
|
||||
/**
|
||||
* Builds a minimal FST (maps an IntsRef term to an arbitrary
|
||||
* output) from pre-sorted terms with outputs. The FST
|
||||
|
|
|
@ -1093,6 +1093,9 @@ public final class FST<T> {
|
|||
return arc;
|
||||
}
|
||||
|
||||
// TODO: could we somehow [partially] tableize arc lookups
|
||||
// look automaton?
|
||||
|
||||
/** Finds an arc leaving the incoming arc, replacing the arc in place.
|
||||
* This returns null if the arc was not found, else the incoming arc. */
|
||||
public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
is threadlocal.get costly? if so maybe make an FSTReader? would hold this "relative" pos, and each thread'd use it for reading, instead of PosRef
|
||||
|
||||
maybe changed Outputs class to "reuse" stuff? eg this new BytesRef in ByteSequenceOutputs..
|
||||
|
||||
do i even "need" both non_final_end_state and final_end_state?
|
||||
|
||||
hmm -- can I get weights working here?
|
||||
|
||||
can FST be used to index all internal substrings, mapping to term?
|
||||
- maybe put back ability to add multiple outputs per input...?
|
||||
|
||||
make this work w/ char...?
|
||||
- then FSTCharFilter/FSTTokenFilter
|
||||
- syn filter?
|
||||
|
||||
experiment: try reversing terms before compressing -- how much smaller?
|
||||
|
||||
maybe seprate out a 'writable/growing fst' from a read-only one?
|
||||
|
||||
can we somehow [partially] tableize lookups like oal.util.automaton?
|
||||
|
||||
make an FST terms index option for codecs...?
|
||||
|
||||
make an FSTCharsMap?
|
||||
|
||||
need a benchmark testing FST traversal -- just fix the static main to rewind & visit all terms
|
||||
|
||||
thread state
|
||||
|
||||
when writing FST to disk:
|
||||
- Sequentially writing (would save memory in codec during indexing). We are now using DataOutput, which could also go directly to disk
|
||||
- problem: size of BytesRef must be known before
|
||||
|
||||
later
|
||||
- maybe don't require FSTEnum.advance to be forward only?
|
||||
- should i make a posIntOutputs separate from posLongOutputs?
|
||||
- mv randomAccpetedWord / run / etc. from test into FST?
|
||||
- hmm get multi-outputs working again? do we ever need this?
|
||||
|
|
@ -587,6 +587,12 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: try experiment: reverse terms before
|
||||
// compressing -- how much smaller?
|
||||
|
||||
// TODO: can FST be used to index all internal substrings,
|
||||
// mapping to term?
|
||||
|
||||
// java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.8.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
|
||||
public static void main(String[] args) throws IOException {
|
||||
int prune = 0;
|
||||
|
|
Loading…
Reference in New Issue