From 3d8bfad40b0217a9a2faa193e3086e876caab041 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 4 Jul 2015 19:47:35 +0000 Subject: [PATCH] LUCENE-6365: switch to iterator API to get all finite strings from an Automaton git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1689192 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../util/automaton/FiniteStringsIterator.java | 216 +++++++++++++++++ .../LimitedFiniteStringsIterator.java | 83 +++++++ .../lucene/util/automaton/Operations.java | 150 ------------ .../automaton/FiniteStringsIteratorTest.java | 217 ++++++++++++++++++ .../LimitedFiniteStringsIteratorTest.java | 101 ++++++++ .../lucene/util/automaton/TestAutomaton.java | 16 +- .../lucene/util/automaton/TestOperations.java | 202 +++------------- .../util/automaton/TestUTF32ToUTF8.java | 2 +- .../suggest/analyzing/AnalyzingSuggester.java | 28 +-- .../suggest/analyzing/FuzzySuggester.java | 31 ++- .../document/CompletionTokenStream.java | 53 ++--- .../document/FuzzyCompletionQuery.java | 45 ++-- .../suggest/analyzing/FuzzySuggesterTest.java | 8 +- 14 files changed, 737 insertions(+), 419 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/automaton/LimitedFiniteStringsIterator.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/automaton/FiniteStringsIteratorTest.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/automaton/LimitedFiniteStringsIteratorTest.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0b2509bb623..603c986e73a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -126,6 +126,10 @@ New Features * LUCENE-6365: Add Operations.topoSort, to run topological sort of the states in an Automaton (Markus Heiden via Mike McCandless) +* LUCENE-6365: Replace Operations.getFiniteStrings with a + more scalable iterator API (FiniteStringsIterator) (Markus Heiden + via Mike McCandless) + API Changes * LUCENE-6508: Simplify Lock api, there is now just diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java b/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java new file mode 100644 index 00000000000..ee332952379 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/FiniteStringsIterator.java @@ -0,0 +1,216 @@ +package org.apache.lucene.util.automaton; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.IntsRefBuilder; +import org.apache.lucene.util.RamUsageEstimator; + +import java.util.BitSet; + +/** + * Iterates all accepted strings. + * + *

If the {@link Automaton} has cycles then this iterator may throw an {@code + * IllegalArgumentException}, but this is not guaranteed! + * + *

Be aware that the iteration order is implementation dependent + * and may change across releases. + * + * @lucene.experimental + */ +public class FiniteStringsIterator { + /** + * Empty string. + */ + private static final IntsRef EMPTY = new IntsRef(); + + /** + * Automaton to create finite string from. + */ + private final Automaton a; + + /** + * Tracks which states are in the current path, for cycle detection. + */ + private final BitSet pathStates; + + /** + * Builder for current finite string. + */ + private final IntsRefBuilder string; + + /** + * Stack to hold our current state in the recursion/iteration. + */ + private PathNode[] nodes; + + /** + * Emit empty string?. + */ + private boolean emitEmptyString; + + /** + * Constructor. + * + * @param a Automaton to create finite string from. + */ + public FiniteStringsIterator(Automaton a) { + this.a = a; + this.nodes = new PathNode[16]; + for (int i = 0, end = nodes.length; i < end; i++) { + nodes[i] = new PathNode(); + } + this.string = new IntsRefBuilder(); + this.pathStates = new BitSet(a.getNumStates()); + this.string.setLength(0); + this.emitEmptyString = a.isAccept(0); + + // Start iteration with node 0. + if (a.getNumTransitions(0) > 0) { + pathStates.set(0); + nodes[0].resetState(a, 0); + string.append(0); + } + } + + /** + * Generate next finite string. + * The return value is just valid until the next call of this method! + * + * @return Finite string or null, if no more finite strings are available. + */ + public IntsRef next() { + // Special case the empty string, as usual: + if (emitEmptyString) { + emitEmptyString = false; + return EMPTY; + } + + for (int depth = string.length(); depth > 0;) { + PathNode node = nodes[depth-1]; + + // Get next label leaving the current node: + int label = node.nextLabel(a); + if (label != -1) { + string.setIntAt(depth - 1, label); + + int to = node.to; + if (a.getNumTransitions(to) != 0) { + // Now recurse: the destination of this transition has outgoing transitions: + if (pathStates.get(to)) { + throw new IllegalArgumentException("automaton has cycles"); + } + pathStates.set(to); + + // Push node onto stack: + growStack(depth); + nodes[depth].resetState(a, to); + depth++; + string.setLength(depth); + string.grow(depth); + } else if (a.isAccept(to)) { + // This transition leads to an accept state, so we save the current string: + return string.get(); + } + } else { + // No more transitions leaving this state, pop/return back to previous state: + int state = node.state; + assert pathStates.get(state); + pathStates.clear(state); + depth--; + string.setLength(depth); + + if (a.isAccept(state)) { + // This transition leads to an accept state, so we save the current string: + return string.get(); + } + } + } + + // Finished iteration. + return null; + } + + /** + * Grow path stack, if required. + */ + private void growStack(int depth) { + if (nodes.length == depth) { + PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(nodes, 0, newNodes, 0, nodes.length); + for (int i = depth, end = newNodes.length; i < end; i++) { + newNodes[i] = new PathNode(); + } + nodes = newNodes; + } + } + + /** + * Nodes for path stack. + */ + private static class PathNode { + + /** Which state the path node ends on, whose + * transitions we are enumerating. */ + public int state; + + /** Which state the current transition leads to. */ + public int to; + + /** Which transition we are on. */ + public int transition; + + /** Which label we are on, in the min-max range of the + * current Transition */ + public int label; + + private final Transition t = new Transition(); + + public void resetState(Automaton a, int state) { + assert a.getNumTransitions(state) != 0; + this.state = state; + transition = 0; + a.getTransition(state, 0, t); + label = t.min; + to = t.dest; + } + + /** Returns next label of current transition, or + * advances to next transition and returns its first + * label, if current one is exhausted. If there are + * no more transitions, returns -1. */ + public int nextLabel(Automaton a) { + if (label > t.max) { + // We've exhaused the current transition's labels; + // move to next transitions: + transition++; + if (transition >= a.getNumTransitions(state)) { + // We're done iterating transitions leaving this state + label = -1; + return -1; + } + a.getTransition(state, transition, t); + label = t.min; + to = t.dest; + } + return label++; + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/LimitedFiniteStringsIterator.java b/lucene/core/src/java/org/apache/lucene/util/automaton/LimitedFiniteStringsIterator.java new file mode 100644 index 00000000000..90107e4629b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/LimitedFiniteStringsIterator.java @@ -0,0 +1,83 @@ +package org.apache.lucene.util.automaton; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.IntsRef; + +/** + * {@link FiniteStringsIterator} which limits the number of iterated accepted strings. + * If more than limit strings are accepted, + * the first limit strings found are returned. + * + *

If the {@link Automaton} has cycles then this iterator may throw an {@code + * IllegalArgumentException}, but this is not guaranteed! + * + *

Be aware that the iteration order is implementation dependent + * and may change across releases. + * + * @lucene.experimental + */ +public class LimitedFiniteStringsIterator extends FiniteStringsIterator { + /** + * Maximum number of finite strings to create. + */ + private int limit = Integer.MAX_VALUE; + + /** + * Number of generated finite strings. + */ + private int count = 0; + + /** + * Constructor. + * + * @param a Automaton to create finite string from. + * @param limit Maximum number of finite strings to create, or -1 for infinite. + */ + public LimitedFiniteStringsIterator(Automaton a, int limit) { + super(a); + + if (limit != -1 && limit <= 0) { + throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit); + } + + this.limit = limit > 0? limit : Integer.MAX_VALUE; + } + + @Override + public IntsRef next() { + if (count >= limit) { + // Abort on limit. + return null; + } + + IntsRef result = super.next(); + if (result != null) { + count++; + } + + return result; + } + + /** + * Number of iterated finite strings. + */ + public int size() { + return count; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index ef11d87ba7c..eedb5336624 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -1232,156 +1232,6 @@ final public class Operations { return result; } - private static class PathNode { - - /** Which state the path node ends on, whose - * transitions we are enumerating. */ - public int state; - - /** Which state the current transition leads to. */ - public int to; - - /** Which transition we are on. */ - public int transition; - - /** Which label we are on, in the min-max range of the - * current Transition */ - public int label; - - private final Transition t = new Transition(); - - public void resetState(Automaton a, int state) { - assert a.getNumTransitions(state) != 0; - this.state = state; - transition = 0; - a.getTransition(state, 0, t); - label = t.min; - to = t.dest; - } - - /** Returns next label of current transition, or - * advances to next transition and returns its first - * label, if current one is exhausted. If there are - * no more transitions, returns -1. */ - public int nextLabel(Automaton a) { - if (label > t.max) { - // We've exhaused the current transition's labels; - // move to next transitions: - transition++; - if (transition >= a.getNumTransitions(state)) { - // We're done iterating transitions leaving this state - return -1; - } - a.getTransition(state, transition, t); - label = t.min; - to = t.dest; - } - return label++; - } - } - - private static PathNode getNode(PathNode[] nodes, int index) { - assert index < nodes.length; - if (nodes[index] == null) { - nodes[index] = new PathNode(); - } - return nodes[index]; - } - - // TODO: this is a dangerous method ... Automaton could be - // huge ... and it's better in general for caller to - // enumerate & process in a single walk: - - /** Returns the set of accepted strings, up to at most - * limit strings. If more than limit - * strings are accepted, the first limit strings found are returned. If limit == -1, then - * the limit is infinite. If the {@link Automaton} has - * cycles then this method might throw {@code - * IllegalArgumentException} but that is not guaranteed - * when the limit is set. */ - public static Set getFiniteStrings(Automaton a, int limit) { - Set results = new HashSet<>(); - - if (limit == -1 || limit > 0) { - // OK - } else { - throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit); - } - - if (a.isAccept(0)) { - // Special case the empty string, as usual: - results.add(new IntsRef()); - } - - if (a.getNumTransitions(0) > 0 && (limit == -1 || results.size() < limit)) { - - int numStates = a.getNumStates(); - - // Tracks which states are in the current path, for - // cycle detection: - BitSet pathStates = new BitSet(numStates); - - // Stack to hold our current state in the - // recursion/iteration: - PathNode[] nodes = new PathNode[4]; - - pathStates.set(0); - PathNode root = getNode(nodes, 0); - root.resetState(a, 0); - - IntsRefBuilder string = new IntsRefBuilder(); - string.append(0); - - while (string.length() > 0) { - - PathNode node = nodes[string.length()-1]; - - // Get next label leaving the current node: - int label = node.nextLabel(a); - - if (label != -1) { - string.setIntAt(string.length()-1, label); - - if (a.isAccept(node.to)) { - // This transition leads to an accept state, - // so we save the current string: - results.add(string.toIntsRef()); - if (results.size() == limit) { - break; - } - } - - if (a.getNumTransitions(node.to) != 0) { - // Now recurse: the destination of this transition has - // outgoing transitions: - if (pathStates.get(node.to)) { - throw new IllegalArgumentException("automaton has cycles"); - } - pathStates.set(node.to); - - // Push node onto stack: - if (nodes.length == string.length()) { - PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(nodes, 0, newNodes, 0, nodes.length); - nodes = newNodes; - } - getNode(nodes, string.length()).resetState(a, node.to); - string.setLength(string.length() + 1); - string.grow(string.length()); - } - } else { - // No more transitions leaving this state, - // pop/return back to previous state: - assert pathStates.get(node.state); - pathStates.clear(node.state); - string.setLength(string.length() - 1); - } - } - } - - return results; - } - /** Returns a new automaton accepting the same language with added * transitions to a dead state so that from every state and every label * there is a transition. */ diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/FiniteStringsIteratorTest.java b/lucene/core/src/test/org/apache/lucene/util/automaton/FiniteStringsIteratorTest.java new file mode 100644 index 00000000000..f79e5884953 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/FiniteStringsIteratorTest.java @@ -0,0 +1,217 @@ +package org.apache.lucene.util.automaton; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.IntsRefBuilder; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.fst.Util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; + +/** + * Test for {@link FiniteStringsIterator}. + */ +public class FiniteStringsIteratorTest extends LuceneTestCase { + public void testRandomFiniteStrings1() { + int numStrings = atLeast(100); + if (VERBOSE) { + System.out.println("TEST: numStrings=" + numStrings); + } + + Set strings = new HashSet<>(); + List automata = new ArrayList<>(); + IntsRefBuilder scratch = new IntsRefBuilder(); + for(int i=0;i actual = getFiniteStrings(iterator); + assertFiniteStringsRecursive(a, actual); + + if (!strings.equals(new HashSet<>(actual))) { + System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size()); + List x = new ArrayList<>(strings); + Collections.sort(x); + List y = new ArrayList<>(actual); + Collections.sort(y); + int end = Math.min(x.size(), y.size()); + for(int i=0;i actual = getFiniteStrings(iterator); + assertFiniteStringsRecursive(a, actual); + assertEquals(2, actual.size()); + IntsRefBuilder dog = new IntsRefBuilder(); + Util.toIntsRef(new BytesRef("dog"), dog); + assertTrue(actual.contains(dog.get())); + IntsRefBuilder duck = new IntsRefBuilder(); + Util.toIntsRef(new BytesRef("duck"), duck); + assertTrue(actual.contains(duck.get())); + } + + public void testFiniteStringsEatsStack() { + char[] chars = new char[50000]; + TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); + String bigString1 = new String(chars); + TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); + String bigString2 = new String(chars); + Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2)); + FiniteStringsIterator iterator = new FiniteStringsIterator(a); + List actual = getFiniteStrings(iterator); + assertEquals(2, actual.size()); + IntsRefBuilder scratch = new IntsRefBuilder(); + Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch); + assertTrue(actual.contains(scratch.get())); + Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch); + assertTrue(actual.contains(scratch.get())); + } + + + public void testWithCycle() throws Exception { + try { + Automaton a = new RegExp("abc.*", RegExp.NONE).toAutomaton(); + FiniteStringsIterator iterator = new FiniteStringsIterator(a); + getFiniteStrings(iterator); + fail("did not hit exception"); + } catch (IllegalArgumentException iae) { + // expected + } + } + + public void testSingletonNoLimit() { + Automaton a = Automata.makeString("foobar"); + FiniteStringsIterator iterator = new FiniteStringsIterator(a); + List actual = getFiniteStrings(iterator); + assertEquals(1, actual.size()); + IntsRefBuilder scratch = new IntsRefBuilder(); + Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); + assertTrue(actual.contains(scratch.get())); + } + + public void testShortAccept() { + Automaton a = Operations.union(Automata.makeString("x"), Automata.makeString("xy")); + a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES); + FiniteStringsIterator iterator = new FiniteStringsIterator(a); + List actual = getFiniteStrings(iterator); + assertEquals(2, actual.size()); + IntsRefBuilder x = new IntsRefBuilder(); + Util.toIntsRef(new BytesRef("x"), x); + assertTrue(actual.contains(x.get())); + IntsRefBuilder xy = new IntsRefBuilder(); + Util.toIntsRef(new BytesRef("xy"), xy); + assertTrue(actual.contains(xy.get())); + } + + public void testSingleString() { + Automaton a = new Automaton(); + int start = a.createState(); + int end = a.createState(); + a.setAccept(end, true); + a.addTransition(start, end, 'a', 'a'); + a.finishState(); + Set accepted = TestOperations.getFiniteStrings(a); + assertEquals(1, accepted.size()); + IntsRefBuilder intsRef = new IntsRefBuilder(); + intsRef.append('a'); + assertTrue(accepted.contains(intsRef.toIntsRef())); + } + + /** + * All strings generated by the iterator. + */ + static List getFiniteStrings(FiniteStringsIterator iterator) { + List result = new ArrayList<>(); + for (IntsRef finiteString; (finiteString = iterator.next()) != null;) { + result.add(IntsRef.deepCopyOf(finiteString)); + } + + return result; + } + + /** + * Check that strings the automaton returns are as expected. + * + * @param automaton Automaton. + * @param actual Strings generated by automaton. + */ + private void assertFiniteStringsRecursive(Automaton automaton, List actual) { + Set expected = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1); + // Check that no string is emitted twice. + assertEquals(expected.size(), actual.size()); + assertEquals(expected, new HashSet<>(actual)); + } + + // ascii only! + private static String toString(IntsRef ints) { + BytesRef br = new BytesRef(ints.length); + for(int i=0;i actual = getFiniteStrings(new LimitedFiniteStringsIterator(a, 1)); + assertEquals(1, actual.size()); + IntsRefBuilder scratch = new IntsRefBuilder(); + Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); + assertTrue(actual.contains(scratch.get())); + } + + public void testLimit() { + Automaton a = Operations.union(Automata.makeString("foo"), Automata.makeString("bar")); + + // Test without limit + FiniteStringsIterator withoutLimit = new LimitedFiniteStringsIterator(a, -1); + assertEquals(2, getFiniteStrings(withoutLimit).size()); + + // Test with limit + FiniteStringsIterator withLimit = new LimitedFiniteStringsIterator(a, 1); + assertEquals(1, getFiniteStrings(withLimit).size()); + } + + public void testSize() { + Automaton a = Operations.union(Automata.makeString("foo"), Automata.makeString("bar")); + LimitedFiniteStringsIterator iterator = new LimitedFiniteStringsIterator(a, -1); + List actual = getFiniteStrings(iterator); + assertEquals(2, actual.size()); + assertEquals(2, iterator.size()); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java index 5a164f5ac19..d0bfae247b6 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java @@ -505,8 +505,8 @@ public class TestAutomaton extends LuceneTestCase { expected.add(Util.toUTF32(s, ints)); } - assertEquals(expected, Operations.getFiniteStrings(Operations.determinize(a, - DEFAULT_MAX_DETERMINIZED_STATES), -1)); + assertEquals(expected, TestOperations.getFiniteStrings( + Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES))); } public void testConcatenatePreservesDet() throws Exception { @@ -552,11 +552,11 @@ public class TestAutomaton extends LuceneTestCase { // If you concat empty automaton to anything the result should still be empty: Automaton a = Operations.concatenate(Automata.makeEmpty(), Automata.makeString("foo")); - assertEquals(new HashSet(), Operations.getFiniteStrings(a, -1)); + assertEquals(new HashSet(), TestOperations.getFiniteStrings(a)); a = Operations.concatenate(Automata.makeString("foo"), Automata.makeEmpty()); - assertEquals(new HashSet(), Operations.getFiniteStrings(a, -1)); + assertEquals(new HashSet(), TestOperations.getFiniteStrings(a)); } public void testSeemsNonEmptyButIsNot1() throws Exception { @@ -1097,7 +1097,7 @@ public class TestAutomaton extends LuceneTestCase { Util.toUTF32(term.utf8ToString(), intsRef); expected.add(intsRef.toIntsRef()); } - Set actual = Operations.getFiniteStrings(a, -1); + Set actual = TestOperations.getFiniteStrings(a); if (expected.equals(actual) == false) { System.out.println("FAILED:"); @@ -1129,7 +1129,7 @@ public class TestAutomaton extends LuceneTestCase { Util.toIntsRef(term, intsRef); expected2.add(intsRef.toIntsRef()); } - assertEquals(expected2, Operations.getFiniteStrings(utf8, -1)); + assertEquals(expected2, TestOperations.getFiniteStrings(utf8)); } catch (AssertionError ae) { System.out.println("TEST: FAILED: not same"); System.out.println(" terms (count=" + terms.size() + "):"); @@ -1259,7 +1259,7 @@ public class TestAutomaton extends LuceneTestCase { continue; } else { // Enumerate all finite strings and verify the count matches what we expect: - assertEquals(expectedCount, Operations.getFiniteStrings(a, expectedCount).size()); + assertEquals(expectedCount, TestOperations.getFiniteStrings(a, expectedCount).size()); } b = new BytesRefBuilder(); @@ -1339,7 +1339,7 @@ public class TestAutomaton extends LuceneTestCase { Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true); assertTrue(Operations.run(a, intsRef("bar"))); assertTrue(Operations.isFinite(a)); - assertEquals(1, Operations.getFiniteStrings(a, 10).size()); + assertEquals(1, TestOperations.getFiniteStrings(a).size()); } public void testMakeBinaryIntervalCommonPrefix() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java index bdcccd7bbd7..926f4892bcd 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java @@ -20,7 +20,6 @@ package org.apache.lucene.util.automaton; import java.util.*; import org.apache.lucene.util.*; -import org.apache.lucene.util.fst.Util; import com.carrotsearch.randomizedtesting.generators.RandomInts; @@ -125,176 +124,39 @@ public class TestOperations extends LuceneTestCase { } } - /** Pass false for testRecursive if the expected strings - * may be too long */ - private Set getFiniteStrings(Automaton a, int limit, boolean testRecursive) { - Set result = Operations.getFiniteStrings(a, limit); - if (testRecursive) { - assertEquals(AutomatonTestUtil.getFiniteStringsRecursive(a, limit), result); + /** + * Returns the set of all accepted strings. + * + * This method exist just to ease testing. + * For production code directly use {@link FiniteStringsIterator} instead. + * + * @see FiniteStringsIterator + */ + public static Set getFiniteStrings(Automaton a) { + return getFiniteStrings(new FiniteStringsIterator(a)); + } + + /** + * Returns the set of accepted strings, up to at most limit strings. + * + * This method exist just to ease testing. + * For production code directly use {@link LimitedFiniteStringsIterator} instead. + * + * @see LimitedFiniteStringsIterator + */ + public static Set getFiniteStrings(Automaton a, int limit) { + return getFiniteStrings(new LimitedFiniteStringsIterator(a, limit)); + } + + /** + * Get all finite strings of an iterator. + */ + private static Set getFiniteStrings(FiniteStringsIterator iterator) { + Set result = new HashSet<>(); + for (IntsRef finiteString; (finiteString = iterator.next()) != null;) { + result.add(IntsRef.deepCopyOf(finiteString)); } + return result; } - - /** - * Basic test for getFiniteStrings - */ - public void testFiniteStringsBasic() { - Automaton a = Operations.union(Automata.makeString("dog"), Automata.makeString("duck")); - a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES); - Set strings = getFiniteStrings(a, -1, true); - assertEquals(2, strings.size()); - IntsRefBuilder dog = new IntsRefBuilder(); - Util.toIntsRef(new BytesRef("dog"), dog); - assertTrue(strings.contains(dog.get())); - IntsRefBuilder duck = new IntsRefBuilder(); - Util.toIntsRef(new BytesRef("duck"), duck); - assertTrue(strings.contains(duck.get())); - } - - public void testFiniteStringsEatsStack() { - char[] chars = new char[50000]; - TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); - String bigString1 = new String(chars); - TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); - String bigString2 = new String(chars); - Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2)); - Set strings = getFiniteStrings(a, -1, false); - assertEquals(2, strings.size()); - IntsRefBuilder scratch = new IntsRefBuilder(); - Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch); - assertTrue(strings.contains(scratch.get())); - Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch); - assertTrue(strings.contains(scratch.get())); - } - - public void testRandomFiniteStrings1() { - - int numStrings = atLeast(100); - if (VERBOSE) { - System.out.println("TEST: numStrings=" + numStrings); - } - - Set strings = new HashSet(); - List automata = new ArrayList<>(); - IntsRefBuilder scratch = new IntsRefBuilder(); - for(int i=0;i actual = getFiniteStrings(a, -1, true); - if (strings.equals(actual) == false) { - System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size()); - List x = new ArrayList<>(strings); - Collections.sort(x); - List y = new ArrayList<>(actual); - Collections.sort(y); - int end = Math.min(x.size(), y.size()); - for(int i=0;i result = Operations.getFiniteStrings(Automata.makeString("foobar"), -1); - assertEquals(1, result.size()); - IntsRefBuilder scratch = new IntsRefBuilder(); - Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); - assertTrue(result.contains(scratch.get())); - } - - public void testSingletonLimit1() { - Set result = Operations.getFiniteStrings(Automata.makeString("foobar"), 1); - assertEquals(1, result.size()); - IntsRefBuilder scratch = new IntsRefBuilder(); - Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); - assertTrue(result.contains(scratch.get())); - } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java index d1c35119d04..d823b3aa300 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java @@ -218,7 +218,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase { Util.toIntsRef(new BytesRef(s), ints); Set set = new HashSet<>(); set.add(ints.get()); - assertEquals(set, Operations.getFiniteStrings(utf8, -1)); + assertEquals(set, TestOperations.getFiniteStrings(utf8)); } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 7e31e6cd771..11c9a91aba0 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -50,6 +50,7 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.OfflineSorter; import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.fst.Builder; @@ -413,16 +414,13 @@ public class AnalyzingSuggester extends Lookup implements Accountable { byte buffer[] = new byte[8]; try { ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); - BytesRef surfaceForm; - while ((surfaceForm = iterator.next()) != null) { - Set paths = toFiniteStrings(surfaceForm, ts2a); - - maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, paths.size()); + for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null;) { + LimitedFiniteStringsIterator finiteStrings = + new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); - for (IntsRef path : paths) { - - Util.toBytesRef(path, scratch); + for (IntsRef string; (string = finiteStrings.next()) != null; count++) { + Util.toBytesRef(string, scratch); // length of the analyzed text (FST input) if (scratch.length() > Short.MAX_VALUE-2) { @@ -473,7 +471,8 @@ public class AnalyzingSuggester extends Lookup implements Accountable { assert output.getPosition() == requiredLength: output.getPosition() + " vs " + requiredLength; writer.write(buffer, 0, output.getPosition()); } - count++; + + maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size()); } writer.close(); @@ -833,9 +832,9 @@ public class AnalyzingSuggester extends Lookup implements Accountable { return prefixPaths; } - final Set toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { + final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { // Analyze surface form: - Automaton automaton = null; + Automaton automaton; try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { // Create corresponding automaton: labels are bytes @@ -853,12 +852,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable { // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): - - // TODO: we could walk & add simultaneously, so we - // don't have to alloc [possibly biggish] - // intermediate HashSet in RAM: - - return Operations.getFiniteStrings(automaton, maxGraphExpansions); + return automaton; } final Automaton toLookupAutomaton(final CharSequence key) throws IOException { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java index bd207eb6749..2e14e8f850f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java @@ -17,9 +17,8 @@ package org.apache.lucene.search.suggest.analyzing; */ import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -30,6 +29,7 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.UTF32ToUTF8; @@ -221,37 +221,34 @@ public final class FuzzySuggester extends AnalyzingSuggester { } Automaton toLevenshteinAutomata(Automaton automaton) { - final Set ref = Operations.getFiniteStrings(automaton, -1); - Automaton subs[] = new Automaton[ref.size()]; - int upto = 0; - for (IntsRef path : ref) { - if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) { - subs[upto] = Automata.makeString(path.ints, path.offset, path.length); - upto++; + List subs = new ArrayList<>(); + FiniteStringsIterator finiteStrings = new FiniteStringsIterator(automaton); + for (IntsRef string; (string = finiteStrings.next()) != null;) { + if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) { + subs.add(Automata.makeString(string.ints, string.offset, string.length)); } else { - int ints[] = new int[path.length-nonFuzzyPrefix]; - System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length); + int ints[] = new int[string.length-nonFuzzyPrefix]; + System.arraycopy(string.ints, string.offset+nonFuzzyPrefix, ints, 0, ints.length); // TODO: maybe add alphaMin to LevenshteinAutomata, // and pass 1 instead of 0? We probably don't want // to allow the trailing dedup bytes to be // edited... but then 0 byte is "in general" allowed // on input (but not in UTF8). LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); - subs[upto] = lev.toAutomaton(maxEdits, UnicodeUtil.newString(path.ints, path.offset, nonFuzzyPrefix)); - upto++; + subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix))); } } - if (subs.length == 0) { + if (subs.isEmpty()) { // automaton is empty, there is no accepted paths through it return Automata.makeEmpty(); // matches nothing - } else if (subs.length == 1) { + } else if (subs.size() == 1) { // no synonyms or anything: just a single path through the tokenstream - return subs[0]; + return subs.get(0); } else { // multiple paths: this is really scary! is it slow? // maybe we should not do this and throw UOE? - Automaton a = Operations.union(Arrays.asList(subs)); + Automaton a = Operations.union(subs); // TODO: we could call toLevenshteinAutomata() before det? // this only happens if you have multiple paths anyway (e.g. synonyms) return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java index 38728812271..d27653e59f9 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionTokenStream.java @@ -18,14 +18,11 @@ package org.apache.lucene.search.suggest.document; */ import java.io.IOException; -import java.util.Iterator; -import java.util.Set; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStreamToAutomaton; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeReflector; @@ -35,6 +32,8 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; +import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.fst.Util; @@ -56,7 +55,6 @@ import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_L public final class CompletionTokenStream extends TokenStream { private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); private final BytesRefBuilderTermAttribute bytesAtt = addAttribute(BytesRefBuilderTermAttribute.class); private final TokenStream input; @@ -64,9 +62,8 @@ public final class CompletionTokenStream extends TokenStream { final boolean preservePositionIncrements; final int maxGraphExpansions; + private FiniteStringsIterator finiteStrings; private BytesRef payload; - private Iterator finiteStrings; - private int posInc = -1; private CharTermAttribute charTermAttribute; /** @@ -100,45 +97,38 @@ public final class CompletionTokenStream extends TokenStream { public boolean incrementToken() throws IOException { clearAttributes(); if (finiteStrings == null) { - //TODO: make this return a Iterator instead? Automaton automaton = toAutomaton(); - Set strings = Operations.getFiniteStrings(automaton, maxGraphExpansions); - - posInc = strings.size(); - finiteStrings = strings.iterator(); - } - if (finiteStrings.hasNext()) { - posAttr.setPositionIncrement(posInc); - /* - * this posInc encodes the number of paths that this surface form - * produced. Multi Fields have the same surface form and therefore sum up - */ - posInc = 0; - Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8 - if (charTermAttribute != null) { - charTermAttribute.setLength(0); - charTermAttribute.append(bytesAtt.toUTF16()); - } - if (payload != null) { - payloadAttr.setPayload(this.payload); - } - return true; + finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); } - return false; + IntsRef string = finiteStrings.next(); + if (string == null) { + return false; + } + + Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8 + if (charTermAttribute != null) { + charTermAttribute.setLength(0); + charTermAttribute.append(bytesAtt.toUTF16()); + } + if (payload != null) { + payloadAttr.setPayload(this.payload); + } + + return true; } @Override public void end() throws IOException { super.end(); - if (posInc == -1) { + if (finiteStrings == null) { input.end(); } } @Override public void close() throws IOException { - if (posInc == -1) { + if (finiteStrings == null) { input.close(); } } @@ -151,7 +141,6 @@ public final class CompletionTokenStream extends TokenStream { charTermAttribute = getAttribute(CharTermAttribute.class); } finiteStrings = null; - posInc = -1; } /** diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java index 0a3819657ce..85c5d6ecea5 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java @@ -18,7 +18,9 @@ package org.apache.lucene.search.suggest.document; */ import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Set; import org.apache.lucene.analysis.Analyzer; @@ -30,6 +32,7 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.UTF32ToUTF8; @@ -142,28 +145,29 @@ public class FuzzyCompletionQuery extends PrefixCompletionQuery { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text()); - Automaton a = stream.toAutomaton(unicodeAware); - final Set refs = Operations.getFiniteStrings(a, -1); - assert refs.size() > 0; - Automaton automaton = toLevenshteinAutomata(refs); + Set refs = new HashSet<>(); + Automaton automaton = toLevenshteinAutomata(stream.toAutomaton(unicodeAware), refs); if (unicodeAware) { Automaton utf8automaton = new UTF32ToUTF8().convert(automaton); utf8automaton = Operations.determinize(utf8automaton, maxDeterminizedStates); automaton = utf8automaton; } + // TODO Accumulating all refs is bad, because the resulting set may be very big. + // TODO Better iterate over automaton again inside FuzzyCompletionWeight? return new FuzzyCompletionWeight(this, automaton, refs); } - private Automaton toLevenshteinAutomata(Set ref) { - Automaton subs[] = new Automaton[ref.size()]; - int upto = 0; - for (IntsRef path : ref) { - if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) { - subs[upto] = Automata.makeString(path.ints, path.offset, path.length); - upto++; + private Automaton toLevenshteinAutomata(Automaton automaton, Set refs) { + List subs = new ArrayList<>(); + FiniteStringsIterator finiteStrings = new FiniteStringsIterator(automaton); + for (IntsRef string; (string = finiteStrings.next()) != null;) { + refs.add(IntsRef.deepCopyOf(string)); + + if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) { + subs.add(Automata.makeString(string.ints, string.offset, string.length)); } else { - int ints[] = new int[path.length - nonFuzzyPrefix]; - System.arraycopy(path.ints, path.offset + nonFuzzyPrefix, ints, 0, ints.length); + int ints[] = new int[string.length - nonFuzzyPrefix]; + System.arraycopy(string.ints, string.offset + nonFuzzyPrefix, ints, 0, ints.length); // TODO: maybe add alphaMin to LevenshteinAutomata, // and pass 1 instead of 0? We probably don't want // to allow the trailing dedup bytes to be @@ -172,22 +176,21 @@ public class FuzzyCompletionQuery extends PrefixCompletionQuery { LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); - subs[upto] = lev.toAutomaton(maxEdits, - UnicodeUtil.newString(path.ints, path.offset, nonFuzzyPrefix)); - upto++; + subs.add(lev.toAutomaton(maxEdits, + UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix))); } } - if (subs.length == 0) { + if (subs.isEmpty()) { // automaton is empty, there is no accepted paths through it return Automata.makeEmpty(); // matches nothing - } else if (subs.length == 1) { + } else if (subs.size() == 1) { // no synonyms or anything: just a single path through the tokenstream - return subs[0]; + return subs.get(0); } else { // multiple paths: this is really scary! is it slow? // maybe we should not do this and throw UOE? - Automaton a = Operations.union(Arrays.asList(subs)); + Automaton a = Operations.union(subs); // TODO: we could call toLevenshteinAutomata() before det? // this only happens if you have multiple paths anyway (e.g. synonyms) return Operations.determinize(a, maxDeterminizedStates); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java index dad44081837..03e35346fb9 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java @@ -49,6 +49,7 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; import org.apache.lucene.util.fst.Util; public class FuzzySuggesterTest extends LuceneTestCase { @@ -773,10 +774,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { BytesRefBuilder spare = new BytesRefBuilder(); for (TermFreqPayload2 e : slowCompletor) { spare.copyChars(e.analyzedForm); - Set finiteStrings = suggester.toFiniteStrings(spare.get(), tokenStreamToAutomaton); - for (IntsRef intsRef : finiteStrings) { + FiniteStringsIterator finiteStrings = + new FiniteStringsIterator(suggester.toAutomaton(spare.get(), tokenStreamToAutomaton)); + for (IntsRef string; (string = finiteStrings.next()) != null;) { int p = 0; - BytesRef ref = Util.toBytesRef(intsRef, spare); + BytesRef ref = Util.toBytesRef(string, spare); boolean added = false; for (int i = ref.offset; i < ref.length; i++) { int q = automaton.step(p, ref.bytes[i] & 0xff);