mirror of https://github.com/apache/lucene.git
LUCENE-8429: Avoid stack overflows in DaciukMihovAutomatonBuilder.
This commit is contained in:
parent
534204890a
commit
d78feb2236
|
@ -189,6 +189,9 @@ Bug Fixes:
|
|||
|
||||
* LUCENE-8398: TieredMergePolicy.getMaxMergedSegmentMB has rounding error (Erick Erickson)
|
||||
|
||||
* LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
|
||||
overflows by enforcing a maximum term length. (Adrien Grand)
|
||||
|
||||
Changes in Runtime Behavior:
|
||||
|
||||
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing
|
||||
|
|
|
@ -33,7 +33,14 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
* @see Automata#makeStringUnion(Collection)
|
||||
*/
|
||||
public final class DaciukMihovAutomatonBuilder {
|
||||
|
||||
|
||||
/**
|
||||
* This builder rejects terms that are more than 1k chars long since it then
|
||||
* uses recursion based on the length of the string, which might cause stack
|
||||
* overflows.
|
||||
*/
|
||||
static final int MAX_TERM_LENGTH = 1_000;
|
||||
|
||||
/**
|
||||
* The default constructor is private. Use static methods directly.
|
||||
*/
|
||||
|
@ -220,6 +227,9 @@ public final class DaciukMihovAutomatonBuilder {
|
|||
* to this automaton (the input must be sorted).
|
||||
*/
|
||||
public void add(CharsRef current) {
|
||||
if (current.length > MAX_TERM_LENGTH) {
|
||||
throw new IllegalArgumentException("This builder doesn't allow terms that are larger than 1,000 characters, got " + current);
|
||||
}
|
||||
assert stateRegistry != null : "Automaton already built.";
|
||||
assert previous == null
|
||||
|| comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8 order: "
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDaciukMihovAutomatonBuilder extends LuceneTestCase {
|
||||
|
||||
public void testLargeTerms() {
|
||||
byte[] b10k = new byte[10_000];
|
||||
Arrays.fill(b10k, (byte) 'a');
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> DaciukMihovAutomatonBuilder.build(Collections.singleton(new BytesRef(b10k))));
|
||||
assertTrue(e.getMessage().startsWith("This builder doesn't allow terms that are larger than 1,000 characters"));
|
||||
|
||||
byte[] b1k = ArrayUtil.copyOfSubArray(b10k, 0, 1000);
|
||||
DaciukMihovAutomatonBuilder.build(Collections.singleton(new BytesRef(b1k))); // no exception
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue