mirror of https://github.com/apache/druid.git
Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)
1) lookupId could return IDs beyond maxId if called with a recently added value. 2) getRow could return an ID for null beyond maxId, if null was recently encountered in a dimension that initially didn't appear at all. (In this case, the dictionary ID for null can be > 0). Also add a comment explaining how this stuff is supposed to work.
This commit is contained in:
parent
6baea0b4d5
commit
e273264332
|
@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
|
|||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRow;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
|
@ -87,7 +88,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
public DimensionDictionary()
|
||||
{
|
||||
this.lock = new ReentrantReadWriteLock();
|
||||
valueToId.defaultReturnValue(-1);
|
||||
valueToId.defaultReturnValue(ABSENT_VALUE_ID);
|
||||
}
|
||||
|
||||
public int getId(@Nullable String value)
|
||||
|
@ -513,11 +514,21 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
final ExtractionFn extractionFn = spec.getExtractionFn();
|
||||
|
||||
final int dimIndex = desc.getIndex();
|
||||
|
||||
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
|
||||
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
|
||||
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
|
||||
//
|
||||
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
|
||||
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
|
||||
final int maxId = getCardinality();
|
||||
|
||||
class IndexerDimensionSelector implements DimensionSelector, IdLookup
|
||||
{
|
||||
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
|
||||
|
||||
@Nullable
|
||||
@MonotonicNonNull
|
||||
private int[] nullIdIntArray;
|
||||
|
||||
@Override
|
||||
|
@ -542,14 +553,15 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
rowSize = 0;
|
||||
} else {
|
||||
final int nullId = getEncodedValue(null, false);
|
||||
if (nullId > -1) {
|
||||
if (nullId >= 0 && nullId < maxId) {
|
||||
// null was added to the dictionary before this selector was created; return its ID.
|
||||
if (nullIdIntArray == null) {
|
||||
nullIdIntArray = new int[]{nullId};
|
||||
}
|
||||
row = nullIdIntArray;
|
||||
rowSize = 1;
|
||||
} else {
|
||||
// doesn't contain nullId, then empty array is used
|
||||
// null doesn't exist in the dictionary; return an empty array.
|
||||
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
|
||||
row = IntArrays.EMPTY_ARRAY;
|
||||
rowSize = 0;
|
||||
|
@ -668,6 +680,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
public String lookupName(int id)
|
||||
{
|
||||
if (id >= maxId) {
|
||||
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
|
||||
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
|
||||
}
|
||||
final String strValue = getActualValue(id, false);
|
||||
|
@ -695,7 +708,16 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
"cannot perform lookup when applying an extraction function"
|
||||
);
|
||||
}
|
||||
return getEncodedValue(name, false);
|
||||
|
||||
final int id = getEncodedValue(name, false);
|
||||
|
||||
if (id < maxId) {
|
||||
return id;
|
||||
} else {
|
||||
// Can happen if a value was added to our dimLookup after this selector was created. Act like it
|
||||
// doesn't exist.
|
||||
return ABSENT_VALUE_ID;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
|
|
Loading…
Reference in New Issue