Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)

1) lookupId could return IDs beyond maxId if called with a recently added value.
2) getRow could return an ID for null beyond maxId, if null was recently
   encountered in a dimension that initially didn't appear at all. (In this case,
   the dictionary ID for null can be > 0).

Also add a comment explaining how this stuff is supposed to work.
This commit is contained in:
Gian Merlino 2020-08-11 20:32:10 -07:00 committed by GitHub
parent 6baea0b4d5
commit e273264332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 26 additions and 4 deletions

View File

@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRow; import org.apache.druid.segment.incremental.IncrementalIndexRow;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.ArrayList; import java.util.ArrayList;
@ -87,7 +88,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
public DimensionDictionary() public DimensionDictionary()
{ {
this.lock = new ReentrantReadWriteLock(); this.lock = new ReentrantReadWriteLock();
valueToId.defaultReturnValue(-1); valueToId.defaultReturnValue(ABSENT_VALUE_ID);
} }
public int getId(@Nullable String value) public int getId(@Nullable String value)
@ -513,11 +514,21 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
final ExtractionFn extractionFn = spec.getExtractionFn(); final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex(); final int dimIndex = desc.getIndex();
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
//
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
final int maxId = getCardinality(); final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup class IndexerDimensionSelector implements DimensionSelector, IdLookup
{ {
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts(); private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
@Nullable
@MonotonicNonNull
private int[] nullIdIntArray; private int[] nullIdIntArray;
@Override @Override
@ -542,14 +553,15 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
rowSize = 0; rowSize = 0;
} else { } else {
final int nullId = getEncodedValue(null, false); final int nullId = getEncodedValue(null, false);
if (nullId > -1) { if (nullId >= 0 && nullId < maxId) {
// null was added to the dictionary before this selector was created; return its ID.
if (nullIdIntArray == null) { if (nullIdIntArray == null) {
nullIdIntArray = new int[]{nullId}; nullIdIntArray = new int[]{nullId};
} }
row = nullIdIntArray; row = nullIdIntArray;
rowSize = 1; rowSize = 1;
} else { } else {
// doesn't contain nullId, then empty array is used // null doesn't exist in the dictionary; return an empty array.
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism // Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY; row = IntArrays.EMPTY_ARRAY;
rowSize = 0; rowSize = 0;
@ -668,6 +680,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
public String lookupName(int id) public String lookupName(int id)
{ {
if (id >= maxId) { if (id >= maxId) {
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId); throw new ISE("id[%d] >= maxId[%d]", id, maxId);
} }
final String strValue = getActualValue(id, false); final String strValue = getActualValue(id, false);
@ -695,7 +708,16 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
"cannot perform lookup when applying an extraction function" "cannot perform lookup when applying an extraction function"
); );
} }
return getEncodedValue(name, false);
final int id = getEncodedValue(name, false);
if (id < maxId) {
return id;
} else {
// Can happen if a value was added to our dimLookup after this selector was created. Act like it
// doesn't exist.
return ABSENT_VALUE_ID;
}
} }
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")