mirror of
https://github.com/apache/druid.git
synced 2025-02-25 12:35:33 +00:00
GenericIndexedWriter: Fix issue when writing large values to large columns. (#9029)
This commit is contained in:
parent
3325da1718
commit
d452cbbb82
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
package org.apache.druid.segment.data;
|
package org.apache.druid.segment.data;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||||
@ -48,7 +49,7 @@ import java.nio.channels.WritableByteChannel;
|
|||||||
*/
|
*/
|
||||||
public class GenericIndexedWriter<T> implements Serializer
|
public class GenericIndexedWriter<T> implements Serializer
|
||||||
{
|
{
|
||||||
private static int PAGE_SIZE = 4096;
|
private static final int PAGE_SIZE = 4096;
|
||||||
|
|
||||||
private static final MetaSerdeHelper<GenericIndexedWriter> SINGLE_FILE_META_SERDE_HELPER = MetaSerdeHelper
|
private static final MetaSerdeHelper<GenericIndexedWriter> SINGLE_FILE_META_SERDE_HELPER = MetaSerdeHelper
|
||||||
.firstWriteByte((GenericIndexedWriter x) -> GenericIndexed.VERSION_ONE)
|
.firstWriteByte((GenericIndexedWriter x) -> GenericIndexed.VERSION_ONE)
|
||||||
@ -148,9 +149,16 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
@Nullable
|
@Nullable
|
||||||
private LongList headerOutLong;
|
private LongList headerOutLong;
|
||||||
|
|
||||||
|
// Used by checkedCastNonnegativeLongToInt. Will always be Integer.MAX_VALUE in production.
|
||||||
|
private int intMaxForCasting = Integer.MAX_VALUE;
|
||||||
|
|
||||||
private final ByteBuffer getOffsetBuffer = ByteBuffer.allocate(Integer.BYTES);
|
private final ByteBuffer getOffsetBuffer = ByteBuffer.allocate(Integer.BYTES);
|
||||||
|
|
||||||
public GenericIndexedWriter(SegmentWriteOutMedium segmentWriteOutMedium, String filenameBase, ObjectStrategy<T> strategy)
|
public GenericIndexedWriter(
|
||||||
|
SegmentWriteOutMedium segmentWriteOutMedium,
|
||||||
|
String filenameBase,
|
||||||
|
ObjectStrategy<T> strategy
|
||||||
|
)
|
||||||
{
|
{
|
||||||
this(segmentWriteOutMedium, filenameBase, strategy, Integer.MAX_VALUE & ~PAGE_SIZE);
|
this(segmentWriteOutMedium, filenameBase, strategy, Integer.MAX_VALUE & ~PAGE_SIZE);
|
||||||
}
|
}
|
||||||
@ -210,13 +218,18 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
objectsSorted = false;
|
objectsSorted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setIntMaxForCasting(final int intMaxForCasting)
|
||||||
|
{
|
||||||
|
this.intMaxForCasting = intMaxForCasting;
|
||||||
|
}
|
||||||
|
|
||||||
public void write(@Nullable T objectToWrite) throws IOException
|
public void write(@Nullable T objectToWrite) throws IOException
|
||||||
{
|
{
|
||||||
if (objectsSorted && prevObject != null && strategy.compare(prevObject, objectToWrite) >= 0) {
|
if (objectsSorted && prevObject != null && strategy.compare(prevObject, objectToWrite) >= 0) {
|
||||||
objectsSorted = false;
|
objectsSorted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
++numWritten;
|
|
||||||
// for compatibility with the format (see GenericIndexed javadoc for description of the format),
|
// for compatibility with the format (see GenericIndexed javadoc for description of the format),
|
||||||
// this field is used to store nullness marker, but in a better format this info can take 1 bit.
|
// this field is used to store nullness marker, but in a better format this info can take 1 bit.
|
||||||
valuesOut.writeInt(objectToWrite == null ? GenericIndexed.NULL_VALUE_SIZE_MARKER : 0);
|
valuesOut.writeInt(objectToWrite == null ? GenericIndexed.NULL_VALUE_SIZE_MARKER : 0);
|
||||||
@ -224,17 +237,28 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
strategy.writeTo(objectToWrite, valuesOut);
|
strategy.writeTo(objectToWrite, valuesOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!requireMultipleFiles) {
|
// Before updating the header, check if we need to switch to multi-file mode.
|
||||||
headerOut.writeInt(Ints.checkedCast(valuesOut.size()));
|
|
||||||
} else {
|
|
||||||
headerOutLong.add(valuesOut.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!requireMultipleFiles && getSerializedSize() > fileSizeLimit) {
|
if (!requireMultipleFiles && getSerializedSize() > fileSizeLimit) {
|
||||||
requireMultipleFiles = true;
|
requireMultipleFiles = true;
|
||||||
initializeHeaderOutLong();
|
initializeHeaderOutLong();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Increment number of values written. Important to do this after the check above, since numWritten is
|
||||||
|
// accessed during "initializeHeaderOutLong" to determine the length of the header.
|
||||||
|
++numWritten;
|
||||||
|
|
||||||
|
if (!requireMultipleFiles) {
|
||||||
|
headerOut.writeInt(checkedCastNonnegativeLongToInt(valuesOut.size()));
|
||||||
|
|
||||||
|
// Check _again_ if we need to switch to multi-file mode. (We might need to after updating the header.)
|
||||||
|
if (getSerializedSize() > fileSizeLimit) {
|
||||||
|
requireMultipleFiles = true;
|
||||||
|
initializeHeaderOutLong();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
headerOutLong.add(valuesOut.size());
|
||||||
|
}
|
||||||
|
|
||||||
if (objectsSorted) {
|
if (objectsSorted) {
|
||||||
prevObject = objectToWrite;
|
prevObject = objectToWrite;
|
||||||
}
|
}
|
||||||
@ -250,7 +274,7 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
startOffset = getOffset(index - 1) + Integer.BYTES;
|
startOffset = getOffset(index - 1) + Integer.BYTES;
|
||||||
}
|
}
|
||||||
long endOffset = getOffset(index);
|
long endOffset = getOffset(index);
|
||||||
int valueSize = Ints.checkedCast(endOffset - startOffset);
|
int valueSize = checkedCastNonnegativeLongToInt(endOffset - startOffset);
|
||||||
if (valueSize == 0) {
|
if (valueSize == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -410,7 +434,7 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
if (headerIndex >= numWritten) {
|
if (headerIndex >= numWritten) {
|
||||||
return true;
|
return true;
|
||||||
} else if (headerIndex + bagSize <= numWritten) {
|
} else if (headerIndex + bagSize <= numWritten) {
|
||||||
currentValueOffset = headerOutLong.getLong(Ints.checkedCast(headerIndex + bagSize - 1));
|
currentValueOffset = headerOutLong.getLong(checkedCastNonnegativeLongToInt(headerIndex + bagSize - 1));
|
||||||
} else if (numWritten < headerIndex + bagSize) {
|
} else if (numWritten < headerIndex + bagSize) {
|
||||||
currentValueOffset = headerOutLong.getLong(numWritten - 1);
|
currentValueOffset = headerOutLong.getLong(numWritten - 1);
|
||||||
}
|
}
|
||||||
@ -446,7 +470,7 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
}
|
}
|
||||||
currentNumBytes = headerOutLong.getLong(pos);
|
currentNumBytes = headerOutLong.getLong(pos);
|
||||||
relativeNumBytes = currentNumBytes - relativeRefBytes;
|
relativeNumBytes = currentNumBytes - relativeRefBytes;
|
||||||
helperBuffer.putInt(0, Ints.checkedCast(relativeNumBytes));
|
helperBuffer.putInt(0, checkedCastNonnegativeLongToInt(relativeNumBytes));
|
||||||
helperBuffer.clear();
|
helperBuffer.clear();
|
||||||
smooshChannel.write(helperBuffer);
|
smooshChannel.write(helperBuffer);
|
||||||
}
|
}
|
||||||
@ -464,4 +488,17 @@ public class GenericIndexedWriter<T> implements Serializer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cast a long to an int, throwing an exception if it is out of range. Uses "intMaxForCasting" as the max
|
||||||
|
* integer value. Only works for nonnegative "n".
|
||||||
|
*/
|
||||||
|
private int checkedCastNonnegativeLongToInt(final long n)
|
||||||
|
{
|
||||||
|
if (n >= 0 && n <= intMaxForCasting) {
|
||||||
|
return (int) n;
|
||||||
|
} else {
|
||||||
|
// Likely bug.
|
||||||
|
throw new IAE("Value out of nonnegative int range");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.segment.data;
|
||||||
|
|
||||||
|
import org.apache.druid.common.config.NullHandling;
|
||||||
|
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||||
|
import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
|
public class GenericIndexedWriterTest
|
||||||
|
{
|
||||||
|
@Rule
|
||||||
|
public TemporaryFolder temporaryFolder = new TemporaryFolder();
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void staticSetUp()
|
||||||
|
{
|
||||||
|
NullHandling.initializeForTests();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void writeLargeValueIntoLargeColumn() throws IOException
|
||||||
|
{
|
||||||
|
// Regression test for https://github.com/apache/incubator-druid/issues/9027.
|
||||||
|
|
||||||
|
final GenericIndexedWriter<String> writer = new GenericIndexedWriter<>(
|
||||||
|
new OnHeapMemorySegmentWriteOutMedium(),
|
||||||
|
"test",
|
||||||
|
GenericIndexed.STRING_STRATEGY,
|
||||||
|
100
|
||||||
|
);
|
||||||
|
|
||||||
|
writer.setIntMaxForCasting(150);
|
||||||
|
writer.open();
|
||||||
|
writer.write("i really like writing strings");
|
||||||
|
writer.write("i really like writing strings");
|
||||||
|
writer.write("i really like writing strings i really like writing strings i really like writing strings");
|
||||||
|
writer.write("i really like writing strings");
|
||||||
|
writer.writeTo(
|
||||||
|
FileChannel.open(temporaryFolder.newFile().toPath(), StandardOpenOption.WRITE),
|
||||||
|
new FileSmoosher(temporaryFolder.newFolder())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user