Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)

1) lookupId could return IDs beyond maxId if called with a recently added value.
2) getRow could return an ID for null beyond maxId, if null was recently
   encountered in a dimension that initially didn't appear at all. (In this case,
   the dictionary ID for null can be > 0).

Also add a comment explaining how this stuff is supposed to work.
This commit is contained in:
Gian Merlino 2020-08-11 20:32:10 -07:00 committed by GitHub
parent 6baea0b4d5
commit e273264332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 26 additions and 4 deletions

View File

@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRow;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import javax.annotation.Nullable;
import java.util.ArrayList;
@ -87,7 +88,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
public DimensionDictionary()
{
this.lock = new ReentrantReadWriteLock();
valueToId.defaultReturnValue(-1);
valueToId.defaultReturnValue(ABSENT_VALUE_ID);
}
public int getId(@Nullable String value)
@ -513,11 +514,21 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
//
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup
{
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
@Nullable
@MonotonicNonNull
private int[] nullIdIntArray;
@Override
@ -542,14 +553,15 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
rowSize = 0;
} else {
final int nullId = getEncodedValue(null, false);
if (nullId > -1) {
if (nullId >= 0 && nullId < maxId) {
// null was added to the dictionary before this selector was created; return its ID.
if (nullIdIntArray == null) {
nullIdIntArray = new int[]{nullId};
}
row = nullIdIntArray;
rowSize = 1;
} else {
// doesn't contain nullId, then empty array is used
// null doesn't exist in the dictionary; return an empty array.
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
@ -668,6 +680,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
public String lookupName(int id)
{
if (id >= maxId) {
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
}
final String strValue = getActualValue(id, false);
@ -695,7 +708,16 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
"cannot perform lookup when applying an extraction function"
);
}
return getEncodedValue(name, false);
final int id = getEncodedValue(name, false);
if (id < maxId) {
return id;
} else {
// Can happen if a value was added to our dimLookup after this selector was created. Act like it
// doesn't exist.
return ABSENT_VALUE_ID;
}
}
@SuppressWarnings("deprecation")