mirror of https://github.com/apache/druid.git
Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)
1) lookupId could return IDs beyond maxId if called with a recently added value. 2) getRow could return an ID for null beyond maxId, if null was recently encountered in a dimension that initially didn't appear at all. (In this case, the dictionary ID for null can be > 0). Also add a comment explaining how this stuff is supposed to work.
This commit is contained in:
parent
6baea0b4d5
commit
e273264332
|
@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndexRow;
|
import org.apache.druid.segment.incremental.IncrementalIndexRow;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||||
|
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -87,7 +88,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
public DimensionDictionary()
|
public DimensionDictionary()
|
||||||
{
|
{
|
||||||
this.lock = new ReentrantReadWriteLock();
|
this.lock = new ReentrantReadWriteLock();
|
||||||
valueToId.defaultReturnValue(-1);
|
valueToId.defaultReturnValue(ABSENT_VALUE_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getId(@Nullable String value)
|
public int getId(@Nullable String value)
|
||||||
|
@ -513,11 +514,21 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
final ExtractionFn extractionFn = spec.getExtractionFn();
|
final ExtractionFn extractionFn = spec.getExtractionFn();
|
||||||
|
|
||||||
final int dimIndex = desc.getIndex();
|
final int dimIndex = desc.getIndex();
|
||||||
|
|
||||||
|
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
|
||||||
|
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
|
||||||
|
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
|
||||||
|
//
|
||||||
|
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
|
||||||
|
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
|
||||||
final int maxId = getCardinality();
|
final int maxId = getCardinality();
|
||||||
|
|
||||||
class IndexerDimensionSelector implements DimensionSelector, IdLookup
|
class IndexerDimensionSelector implements DimensionSelector, IdLookup
|
||||||
{
|
{
|
||||||
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
|
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
@MonotonicNonNull
|
||||||
private int[] nullIdIntArray;
|
private int[] nullIdIntArray;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -542,14 +553,15 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
rowSize = 0;
|
rowSize = 0;
|
||||||
} else {
|
} else {
|
||||||
final int nullId = getEncodedValue(null, false);
|
final int nullId = getEncodedValue(null, false);
|
||||||
if (nullId > -1) {
|
if (nullId >= 0 && nullId < maxId) {
|
||||||
|
// null was added to the dictionary before this selector was created; return its ID.
|
||||||
if (nullIdIntArray == null) {
|
if (nullIdIntArray == null) {
|
||||||
nullIdIntArray = new int[]{nullId};
|
nullIdIntArray = new int[]{nullId};
|
||||||
}
|
}
|
||||||
row = nullIdIntArray;
|
row = nullIdIntArray;
|
||||||
rowSize = 1;
|
rowSize = 1;
|
||||||
} else {
|
} else {
|
||||||
// doesn't contain nullId, then empty array is used
|
// null doesn't exist in the dictionary; return an empty array.
|
||||||
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
|
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
|
||||||
row = IntArrays.EMPTY_ARRAY;
|
row = IntArrays.EMPTY_ARRAY;
|
||||||
rowSize = 0;
|
rowSize = 0;
|
||||||
|
@ -668,6 +680,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
public String lookupName(int id)
|
public String lookupName(int id)
|
||||||
{
|
{
|
||||||
if (id >= maxId) {
|
if (id >= maxId) {
|
||||||
|
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
|
||||||
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
|
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
|
||||||
}
|
}
|
||||||
final String strValue = getActualValue(id, false);
|
final String strValue = getActualValue(id, false);
|
||||||
|
@ -695,7 +708,16 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
||||||
"cannot perform lookup when applying an extraction function"
|
"cannot perform lookup when applying an extraction function"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return getEncodedValue(name, false);
|
|
||||||
|
final int id = getEncodedValue(name, false);
|
||||||
|
|
||||||
|
if (id < maxId) {
|
||||||
|
return id;
|
||||||
|
} else {
|
||||||
|
// Can happen if a value was added to our dimLookup after this selector was created. Act like it
|
||||||
|
// doesn't exist.
|
||||||
|
return ABSENT_VALUE_ID;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
|
|
Loading…
Reference in New Issue