Eager file unmapping in IndexIO, IndexMerger and IndexMergerV9 (#3422)

* Eager file unmapping in IndexIO, IndexMerger and IndexMergerV9. The exact purpose for this change is to allow running IndexMergeBenchmark in Windows, however should also be universally 'better' than non-deterministic unmapping, done when MappedByteBuffers are garbage-collected (BACKEND-312)

* Use Closer with a proper pattern in IndexIO, IndexMerger and IndexMergerV9

* Unmap file in IndexMergerV9.makeInvertedIndexes() using try-with-resources

* Reformat IndexIO
This commit is contained in:
Roman Leventov 2016-09-07 20:43:47 +03:00 committed by Charles Allen
parent c0e62b536a
commit 4f0bcdce36
3 changed files with 419 additions and 380 deletions

View File

@ -32,6 +32,7 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;
import com.google.common.io.Closeables;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import com.google.common.primitives.Ints;
import com.google.inject.Inject;
@ -120,17 +121,17 @@ public class IndexIO
this.columnConfig = Preconditions.checkNotNull(columnConfig, "null ColumnConfig");
defaultIndexIOHandler = new DefaultIndexIOHandler(mapper);
indexLoaders = ImmutableMap.<Integer, IndexLoader>builder()
.put(0, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(1, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(2, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(3, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(4, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(5, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(6, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(7, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(8, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(9, new V9IndexLoader(columnConfig))
.build();
.put(0, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(1, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(2, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(3, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(4, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(5, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(6, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(7, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(8, new LegacyIndexLoader(defaultIndexIOHandler, columnConfig))
.put(9, new V9IndexLoader(columnConfig))
.build();
}
@ -269,13 +270,14 @@ public class IndexIO
case 6:
case 7:
log.info("Old version, re-persisting.");
QueryableIndex segmentToConvert = loadIndex(toConvert);
new IndexMerger(mapper, this).append(
Arrays.<IndexableAdapter>asList(new QueryableIndexIndexableAdapter(segmentToConvert)),
null,
converted,
indexSpec
);
try (QueryableIndex segmentToConvert = loadIndex(toConvert)) {
new IndexMerger(mapper, this).append(
Arrays.<IndexableAdapter>asList(new QueryableIndexIndexableAdapter(segmentToConvert)),
null,
converted,
indexSpec
);
}
return true;
case 8:
defaultIndexIOHandler.convertV8toV9(toConvert, converted, indexSpec);
@ -545,347 +547,355 @@ public class IndexIO
Closeables.close(indexIn, false);
}
SmooshedFileMapper v8SmooshedFiles = Smoosh.map(v8Dir);
Closer closer = Closer.create();
try {
SmooshedFileMapper v8SmooshedFiles = closer.register(Smoosh.map(v8Dir));
v9Dir.mkdirs();
final FileSmoosher v9Smoosher = new FileSmoosher(v9Dir);
v9Dir.mkdirs();
final FileSmoosher v9Smoosher = closer.register(new FileSmoosher(v9Dir));
ByteStreams.write(Ints.toByteArray(9), Files.newOutputStreamSupplier(new File(v9Dir, "version.bin")));
ByteStreams.write(Ints.toByteArray(9), Files.newOutputStreamSupplier(new File(v9Dir, "version.bin")));
Map<String, GenericIndexed<ImmutableBitmap>> bitmapIndexes = Maps.newHashMap();
final ByteBuffer invertedBuffer = v8SmooshedFiles.mapFile("inverted.drd");
BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
Map<String, GenericIndexed<ImmutableBitmap>> bitmapIndexes = Maps.newHashMap();
final ByteBuffer invertedBuffer = v8SmooshedFiles.mapFile("inverted.drd");
BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
while (invertedBuffer.hasRemaining()) {
final String dimName = serializerUtils.readString(invertedBuffer);
bitmapIndexes.put(
dimName,
GenericIndexed.read(invertedBuffer, bitmapSerdeFactory.getObjectStrategy())
);
}
Map<String, ImmutableRTree> spatialIndexes = Maps.newHashMap();
final ByteBuffer spatialBuffer = v8SmooshedFiles.mapFile("spatial.drd");
while (spatialBuffer != null && spatialBuffer.hasRemaining()) {
spatialIndexes.put(
serializerUtils.readString(spatialBuffer),
ByteBufferSerializer.read(
spatialBuffer, new IndexedRTree.ImmutableRTreeObjectStrategy(
bitmapSerdeFactory.getBitmapFactory()
)
)
);
}
final LinkedHashSet<String> skippedFiles = Sets.newLinkedHashSet();
final Set<String> skippedDimensions = Sets.newLinkedHashSet();
for (String filename : v8SmooshedFiles.getInternalFilenames()) {
log.info("Processing file[%s]", filename);
if (filename.startsWith("dim_")) {
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.STRING);
final List<ByteBuffer> outParts = Lists.newArrayList();
ByteBuffer dimBuffer = v8SmooshedFiles.mapFile(filename);
String dimension = serializerUtils.readString(dimBuffer);
if (!filename.equals(String.format("dim_%s.drd", dimension))) {
throw new ISE("loaded dimension[%s] from file[%s]", dimension, filename);
}
ByteArrayOutputStream nameBAOS = new ByteArrayOutputStream();
serializerUtils.writeString(nameBAOS, dimension);
outParts.add(ByteBuffer.wrap(nameBAOS.toByteArray()));
GenericIndexed<String> dictionary = GenericIndexed.read(
dimBuffer, GenericIndexed.STRING_STRATEGY
while (invertedBuffer.hasRemaining()) {
final String dimName = serializerUtils.readString(invertedBuffer);
bitmapIndexes.put(
dimName,
GenericIndexed.read(invertedBuffer, bitmapSerdeFactory.getObjectStrategy())
);
}
if (dictionary.size() == 0) {
log.info("Dimension[%s] had cardinality 0, equivalent to no column, so skipping.", dimension);
skippedDimensions.add(dimension);
continue;
}
Map<String, ImmutableRTree> spatialIndexes = Maps.newHashMap();
final ByteBuffer spatialBuffer = v8SmooshedFiles.mapFile("spatial.drd");
while (spatialBuffer != null && spatialBuffer.hasRemaining()) {
spatialIndexes.put(
serializerUtils.readString(spatialBuffer),
ByteBufferSerializer.read(
spatialBuffer, new IndexedRTree.ImmutableRTreeObjectStrategy(
bitmapSerdeFactory.getBitmapFactory()
)
)
);
}
int emptyStrIdx = dictionary.indexOf("");
List<Integer> singleValCol = null;
VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer());
GenericIndexed<ImmutableBitmap> bitmaps = bitmapIndexes.get(dimension);
ImmutableRTree spatialIndex = spatialIndexes.get(dimension);
final LinkedHashSet<String> skippedFiles = Sets.newLinkedHashSet();
final Set<String> skippedDimensions = Sets.newLinkedHashSet();
for (String filename : v8SmooshedFiles.getInternalFilenames()) {
log.info("Processing file[%s]", filename);
if (filename.startsWith("dim_")) {
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.STRING);
final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
boolean onlyOneValue = true;
MutableBitmap nullsSet = null;
for (int i = 0; i < multiValCol.size(); ++i) {
VSizeIndexedInts rowValue = multiValCol.get(i);
if (!onlyOneValue) {
break;
final List<ByteBuffer> outParts = Lists.newArrayList();
ByteBuffer dimBuffer = v8SmooshedFiles.mapFile(filename);
String dimension = serializerUtils.readString(dimBuffer);
if (!filename.equals(String.format("dim_%s.drd", dimension))) {
throw new ISE("loaded dimension[%s] from file[%s]", dimension, filename);
}
if (rowValue.size() > 1) {
onlyOneValue = false;
ByteArrayOutputStream nameBAOS = new ByteArrayOutputStream();
serializerUtils.writeString(nameBAOS, dimension);
outParts.add(ByteBuffer.wrap(nameBAOS.toByteArray()));
GenericIndexed<String> dictionary = GenericIndexed.read(
dimBuffer, GenericIndexed.STRING_STRATEGY
);
if (dictionary.size() == 0) {
log.info("Dimension[%s] had cardinality 0, equivalent to no column, so skipping.", dimension);
skippedDimensions.add(dimension);
continue;
}
if (rowValue.size() == 0 || rowValue.get(0) == emptyStrIdx) {
if (nullsSet == null) {
nullsSet = bitmapFactory.makeEmptyMutableBitmap();
int emptyStrIdx = dictionary.indexOf("");
List<Integer> singleValCol = null;
VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer());
GenericIndexed<ImmutableBitmap> bitmaps = bitmapIndexes.get(dimension);
ImmutableRTree spatialIndex = spatialIndexes.get(dimension);
final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
boolean onlyOneValue = true;
MutableBitmap nullsSet = null;
for (int i = 0; i < multiValCol.size(); ++i) {
VSizeIndexedInts rowValue = multiValCol.get(i);
if (!onlyOneValue) {
break;
}
if (rowValue.size() > 1) {
onlyOneValue = false;
}
if (rowValue.size() == 0 || rowValue.get(0) == emptyStrIdx) {
if (nullsSet == null) {
nullsSet = bitmapFactory.makeEmptyMutableBitmap();
}
nullsSet.add(i);
}
nullsSet.add(i);
}
}
if (onlyOneValue) {
log.info("Dimension[%s] is single value, converting...", dimension);
final boolean bumpedDictionary;
if (nullsSet != null) {
log.info("Dimension[%s] has null rows.", dimension);
final ImmutableBitmap theNullSet = bitmapFactory.makeImmutableBitmap(nullsSet);
if (onlyOneValue) {
log.info("Dimension[%s] is single value, converting...", dimension);
final boolean bumpedDictionary;
if (nullsSet != null) {
log.info("Dimension[%s] has null rows.", dimension);
final ImmutableBitmap theNullSet = bitmapFactory.makeImmutableBitmap(nullsSet);
if (dictionary.get(0) != null) {
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
bumpedDictionary = true;
final List<String> nullList = Lists.newArrayList();
nullList.add(null);
if (dictionary.get(0) != null) {
log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
bumpedDictionary = true;
final List<String> nullList = Lists.newArrayList();
nullList.add(null);
dictionary = GenericIndexed.fromIterable(
Iterables.concat(nullList, dictionary),
GenericIndexed.STRING_STRATEGY
);
dictionary = GenericIndexed.fromIterable(
Iterables.concat(nullList, dictionary),
GenericIndexed.STRING_STRATEGY
);
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(Arrays.asList(theNullSet), bitmaps),
bitmapSerdeFactory.getObjectStrategy()
);
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(Arrays.asList(theNullSet), bitmaps),
bitmapSerdeFactory.getObjectStrategy()
);
} else {
bumpedDictionary = false;
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(
Arrays.asList(
bitmapFactory
.union(Arrays.asList(theNullSet, bitmaps.get(0)))
),
Iterables.skip(bitmaps, 1)
),
bitmapSerdeFactory.getObjectStrategy()
);
}
} else {
bumpedDictionary = false;
bitmaps = GenericIndexed.fromIterable(
Iterables.concat(
Arrays.asList(
bitmapFactory
.union(Arrays.asList(theNullSet, bitmaps.get(0)))
),
Iterables.skip(bitmaps, 1)
),
bitmapSerdeFactory.getObjectStrategy()
);
}
final VSizeIndexed finalMultiValCol = multiValCol;
singleValCol = new AbstractList<Integer>()
{
@Override
public Integer get(int index)
{
final VSizeIndexedInts ints = finalMultiValCol.get(index);
return ints.size() == 0 ? 0 : ints.get(0) + (bumpedDictionary ? 1 : 0);
}
@Override
public int size()
{
return finalMultiValCol.size();
}
};
multiValCol = null;
} else {
bumpedDictionary = false;
builder.setHasMultipleValues(true);
}
final VSizeIndexed finalMultiValCol = multiValCol;
singleValCol = new AbstractList<Integer>()
{
@Override
public Integer get(int index)
{
final VSizeIndexedInts ints = finalMultiValCol.get(index);
return ints.size() == 0 ? 0 : ints.get(0) + (bumpedDictionary ? 1 : 0);
final CompressedObjectStrategy.CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
final DictionaryEncodedColumnPartSerde.LegacySerializerBuilder columnPartBuilder = DictionaryEncodedColumnPartSerde
.legacySerializerBuilder()
.withDictionary(dictionary)
.withBitmapSerdeFactory(bitmapSerdeFactory)
.withBitmaps(bitmaps)
.withSpatialIndex(spatialIndex)
.withByteOrder(BYTE_ORDER);
if (singleValCol != null) {
if (compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED) {
columnPartBuilder.withSingleValuedColumn(
CompressedVSizeIntsIndexedSupplier.fromList(
singleValCol,
dictionary.size(),
CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(dictionary.size()),
BYTE_ORDER,
compressionStrategy
)
);
} else {
columnPartBuilder.withSingleValuedColumn(VSizeIndexedInts.fromList(singleValCol, dictionary.size()));
}
@Override
public int size()
{
return finalMultiValCol.size();
}
};
multiValCol = null;
} else {
builder.setHasMultipleValues(true);
}
final CompressedObjectStrategy.CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
final DictionaryEncodedColumnPartSerde.LegacySerializerBuilder columnPartBuilder = DictionaryEncodedColumnPartSerde
.legacySerializerBuilder()
.withDictionary(dictionary)
.withBitmapSerdeFactory(bitmapSerdeFactory)
.withBitmaps(bitmaps)
.withSpatialIndex(spatialIndex)
.withByteOrder(BYTE_ORDER);
if (singleValCol != null) {
if (compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED) {
columnPartBuilder.withSingleValuedColumn(
CompressedVSizeIntsIndexedSupplier.fromList(
singleValCol,
} else if (compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED) {
columnPartBuilder.withMultiValuedColumn(
CompressedVSizeIndexedSupplier.fromIterable(
multiValCol,
dictionary.size(),
CompressedVSizeIntsIndexedSupplier.maxIntsInBufferForValue(dictionary.size()),
BYTE_ORDER,
compressionStrategy
)
);
} else {
columnPartBuilder.withSingleValuedColumn(VSizeIndexedInts.fromList(singleValCol, dictionary.size()));
columnPartBuilder.withMultiValuedColumn(multiValCol);
}
} else if (compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED) {
columnPartBuilder.withMultiValuedColumn(
CompressedVSizeIndexedSupplier.fromIterable(
multiValCol,
dictionary.size(),
BYTE_ORDER,
compressionStrategy
)
final ColumnDescriptor serdeficator = builder
.addSerde(columnPartBuilder.build())
.build();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
dimension, serdeficator.numBytes() + specBytes.length
);
} else {
columnPartBuilder.withMultiValuedColumn(multiValCol);
}
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else if (filename.startsWith("met_")) {
if (!filename.endsWith(String.format("%s.drd", BYTE_ORDER))) {
skippedFiles.add(filename);
continue;
}
final ColumnDescriptor serdeficator = builder
.addSerde(columnPartBuilder.build())
.build();
MetricHolder holder = MetricHolder.fromByteBuffer(v8SmooshedFiles.mapFile(filename));
final String metric = holder.getName();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
dimension, serdeficator.numBytes() + specBytes.length
);
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else if (filename.startsWith("met_")) {
if (!filename.endsWith(String.format("%s.drd", BYTE_ORDER))) {
skippedFiles.add(filename);
continue;
}
MetricHolder holder = MetricHolder.fromByteBuffer(v8SmooshedFiles.mapFile(filename));
final String metric = holder.getName();
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
switch (holder.getType()) {
case LONG:
builder.setValueType(ValueType.LONG);
builder.addSerde(
LongGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(holder.longType)
.build()
);
break;
case FLOAT:
builder.setValueType(ValueType.FLOAT);
builder.addSerde(
FloatGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(holder.floatType)
.build()
);
break;
case COMPLEX:
if (!(holder.complexType instanceof GenericIndexed)) {
throw new ISE("Serialized complex types must be GenericIndexed objects.");
}
final GenericIndexed column = (GenericIndexed) holder.complexType;
final String complexType = holder.getTypeName();
builder.setValueType(ValueType.COMPLEX);
builder.addSerde(
ComplexColumnPartSerde.legacySerializerBuilder()
.withTypeName(complexType)
.withDelegate(column).build()
);
break;
default:
throw new ISE("Unknown type[%s]", holder.getType());
}
final ColumnDescriptor serdeficator = builder.build();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
metric, serdeficator.numBytes() + specBytes.length
);
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else if (String.format("time_%s.drd", BYTE_ORDER).equals(filename)) {
CompressedLongsIndexedSupplier timestamps = CompressedLongsIndexedSupplier.fromByteBuffer(
v8SmooshedFiles.mapFile(filename), BYTE_ORDER
);
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.LONG);
builder.addSerde(
LongGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(timestamps)
.build()
);
final ColumnDescriptor serdeficator = builder.build();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
"__time", serdeficator.numBytes() + specBytes.length
);
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else {
skippedFiles.add(filename);
}
}
final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile("index.drd");
indexBuffer.get(); // Skip the version byte
final GenericIndexed<String> dims8 = GenericIndexed.read(
indexBuffer, GenericIndexed.STRING_STRATEGY
);
final GenericIndexed<String> dims9 = GenericIndexed.fromIterable(
Iterables.filter(
dims8, new Predicate<String>()
{
@Override
public boolean apply(String s)
{
return !skippedDimensions.contains(s);
switch (holder.getType()) {
case LONG:
builder.setValueType(ValueType.LONG);
builder.addSerde(
LongGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(holder.longType)
.build()
);
break;
case FLOAT:
builder.setValueType(ValueType.FLOAT);
builder.addSerde(
FloatGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(holder.floatType)
.build()
);
break;
case COMPLEX:
if (!(holder.complexType instanceof GenericIndexed)) {
throw new ISE("Serialized complex types must be GenericIndexed objects.");
}
}
),
GenericIndexed.STRING_STRATEGY
);
final GenericIndexed<String> availableMetrics = GenericIndexed.read(
indexBuffer, GenericIndexed.STRING_STRATEGY
);
final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer));
final BitmapSerdeFactory segmentBitmapSerdeFactory = mapper.readValue(
serializerUtils.readString(indexBuffer),
BitmapSerdeFactory.class
);
final GenericIndexed column = (GenericIndexed) holder.complexType;
final String complexType = holder.getTypeName();
builder.setValueType(ValueType.COMPLEX);
builder.addSerde(
ComplexColumnPartSerde.legacySerializerBuilder()
.withTypeName(complexType)
.withDelegate(column).build()
);
break;
default:
throw new ISE("Unknown type[%s]", holder.getType());
}
Set<String> columns = Sets.newTreeSet();
columns.addAll(Lists.newArrayList(dims9));
columns.addAll(Lists.newArrayList(availableMetrics));
GenericIndexed<String> cols = GenericIndexed.fromIterable(columns, GenericIndexed.STRING_STRATEGY);
final ColumnDescriptor serdeficator = builder.build();
final String segmentBitmapSerdeFactoryString = mapper.writeValueAsString(segmentBitmapSerdeFactory);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final long numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16
+ serializerUtils.getSerializedStringByteSize(segmentBitmapSerdeFactoryString);
final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
cols.writeToChannel(writer);
dims9.writeToChannel(writer);
serializerUtils.writeLong(writer, dataInterval.getStartMillis());
serializerUtils.writeLong(writer, dataInterval.getEndMillis());
serializerUtils.writeString(writer, segmentBitmapSerdeFactoryString);
writer.close();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
metric, serdeficator.numBytes() + specBytes.length
);
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else if (String.format("time_%s.drd", BYTE_ORDER).equals(filename)) {
CompressedLongsIndexedSupplier timestamps = CompressedLongsIndexedSupplier.fromByteBuffer(
v8SmooshedFiles.mapFile(filename), BYTE_ORDER
);
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.LONG);
builder.addSerde(
LongGenericColumnPartSerde.legacySerializerBuilder()
.withByteOrder(BYTE_ORDER)
.withDelegate(timestamps)
.build()
);
final ColumnDescriptor serdeficator = builder.build();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
"__time", serdeficator.numBytes() + specBytes.length
);
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel);
channel.close();
} else {
skippedFiles.add(filename);
}
}
final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile("index.drd");
indexBuffer.get(); // Skip the version byte
final GenericIndexed<String> dims8 = GenericIndexed.read(
indexBuffer, GenericIndexed.STRING_STRATEGY
);
final GenericIndexed<String> dims9 = GenericIndexed.fromIterable(
Iterables.filter(
dims8, new Predicate<String>()
{
@Override
public boolean apply(String s)
{
return !skippedDimensions.contains(s);
}
}
),
GenericIndexed.STRING_STRATEGY
);
final GenericIndexed<String> availableMetrics = GenericIndexed.read(
indexBuffer, GenericIndexed.STRING_STRATEGY
);
final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer));
final BitmapSerdeFactory segmentBitmapSerdeFactory = mapper.readValue(
serializerUtils.readString(indexBuffer),
BitmapSerdeFactory.class
);
Set<String> columns = Sets.newTreeSet();
columns.addAll(Lists.newArrayList(dims9));
columns.addAll(Lists.newArrayList(availableMetrics));
GenericIndexed<String> cols = GenericIndexed.fromIterable(columns, GenericIndexed.STRING_STRATEGY);
final String segmentBitmapSerdeFactoryString = mapper.writeValueAsString(segmentBitmapSerdeFactory);
final long numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16
+ serializerUtils.getSerializedStringByteSize(segmentBitmapSerdeFactoryString);
final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
cols.writeToChannel(writer);
dims9.writeToChannel(writer);
serializerUtils.writeLong(writer, dataInterval.getStartMillis());
serializerUtils.writeLong(writer, dataInterval.getEndMillis());
serializerUtils.writeString(writer, segmentBitmapSerdeFactoryString);
writer.close();
final ByteBuffer metadataBuffer = v8SmooshedFiles.mapFile("metadata.drd");
if (metadataBuffer != null) {
v9Smoosher.add("metadata.drd", metadataBuffer);
}
log.info("Skipped files[%s]", skippedFiles);
final ByteBuffer metadataBuffer = v8SmooshedFiles.mapFile("metadata.drd");
if (metadataBuffer != null) {
v9Smoosher.add("metadata.drd", metadataBuffer);
}
log.info("Skipped files[%s]", skippedFiles);
v9Smoosher.close();
catch (Throwable t) {
throw closer.rethrow(t);
}
finally {
closer.close();
}
}
}

View File

@ -48,6 +48,7 @@ import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.collections.spatial.RTree;
import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy;
import com.metamx.common.ByteBufferUtils;
import com.metamx.common.IAE;
import com.metamx.common.ISE;
import com.metamx.common.Pair;
@ -933,6 +934,14 @@ public class IndexMerger
File dimOutFile = dimOuts.get(i).getFile();
final MappedByteBuffer dimValsMapped = Files.map(dimOutFile);
closer.register(new Closeable()
{
@Override
public void close() throws IOException
{
ByteBufferUtils.unmap(dimValsMapped);
}
});
if (!dimension.equals(serializerUtils.readString(dimValsMapped))) {
throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension);
@ -1078,6 +1087,9 @@ public class IndexMerger
indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
return outDir;
}
catch (Throwable t) {
throw closer.rethrow(t);
}
finally {
closer.close();
}

View File

@ -37,6 +37,7 @@ import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.collections.spatial.RTree;
import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy;
import com.metamx.common.ByteBufferUtils;
import com.metamx.common.ISE;
import com.metamx.common.io.smoosh.FileSmoosher;
import com.metamx.common.io.smoosh.SmooshedWriter;
@ -259,6 +260,9 @@ public class IndexMergerV9 extends IndexMerger
return outDir;
}
catch (Throwable t) {
throw closer.rethrow(t);
}
finally {
closer.close();
}
@ -382,7 +386,11 @@ public class IndexMergerV9 extends IndexMerger
final DictionaryEncodedColumnPartSerde.SerializerBuilder partBuilder = DictionaryEncodedColumnPartSerde
.serializerBuilder()
.withDictionary(dimValueWriters.get(i))
.withValue(dimWriters.get(i), hasMultiValue, compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED)
.withValue(
dimWriters.get(i),
hasMultiValue,
compressionStrategy != CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED
)
.withBitmapSerdeFactory(bitmapSerdeFactory)
.withBitmapIndex(bitmapIndexWriters.get(i))
.withSpatialIndex(spatialIndexWriters.get(i))
@ -536,73 +544,82 @@ public class IndexMergerV9 extends IndexMerger
fos.close();
final MappedByteBuffer dimValsMapped = Files.map(dimValueFile);
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
ByteBufferWriter<ImmutableRTree> spatialIndexWriter = spatialIndexWriters.get(dimIndex);
RTree tree = null;
if (spatialIndexWriter != null) {
BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
}
IndexSeeker[] dictIdSeeker = toIndexSeekers(adapters, dimConversions, dimension);
ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(
nullRowsList.get(dimIndex)
);
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
for (int j = 0; j < adapters.size(); ++j) {
int seekedDictId = dictIdSeeker[j].seek(dictId);
if (seekedDictId != IndexSeeker.NOT_EXIST) {
convertedInverteds.add(
new ConvertingIndexedInts(
adapters.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
)
);
}
try (Closeable dimValsMappedUnmapper = new Closeable()
{
@Override
public void close()
{
ByteBufferUtils.unmap(dimValsMapped);
}
}) {
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
for (Integer row : CombiningIterable.createSplatted(
convertedInverteds,
Ordering.<Integer>natural().nullsFirst()
)) {
if (row != INVALID_ROW) {
bitset.add(row);
}
}
ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset);
if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
bitmapToWrite = nullRowBitmap.union(bitmapToWrite);
}
bitmapIndexWriters.get(dimIndex).write(bitmapToWrite);
ByteBufferWriter<ImmutableRTree> spatialIndexWriter = spatialIndexWriters.get(dimIndex);
RTree tree = null;
if (spatialIndexWriter != null) {
String dimVal = dimVals.get(dictId);
if (dimVal != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
}
IndexSeeker[] dictIdSeeker = toIndexSeekers(adapters, dimConversions, dimension);
ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(
nullRowsList.get(dimIndex)
);
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
for (int j = 0; j < adapters.size(); ++j) {
int seekedDictId = dictIdSeeker[j].seek(dictId);
if (seekedDictId != IndexSeeker.NOT_EXIST) {
convertedInverteds.add(
new ConvertingIndexedInts(
adapters.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
)
);
}
}
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
for (Integer row : CombiningIterable.createSplatted(
convertedInverteds,
Ordering.<Integer>natural().nullsFirst()
)) {
if (row != INVALID_ROW) {
bitset.add(row);
}
}
ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset);
if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
bitmapToWrite = nullRowBitmap.union(bitmapToWrite);
}
bitmapIndexWriters.get(dimIndex).write(bitmapToWrite);
if (spatialIndexWriter != null) {
String dimVal = dimVals.get(dictId);
if (dimVal != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, bitset);
}
tree.insert(coords, bitset);
}
}
if (spatialIndexWriter != null) {
spatialIndexWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
}
log.info(
"Completed dim[%s] inverted with cardinality[%,d] in %,d millis.",
dimension,
dimVals.size(),
System.currentTimeMillis() - dimStartTime
);
}
if (spatialIndexWriter != null) {
spatialIndexWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
}
log.info(
"Completed dim[%s] inverted with cardinality[%,d] in %,d millis.",
dimension,
dimVals.size(),
System.currentTimeMillis() - dimStartTime
);
}
log.info("Completed inverted index in %,d millis.", System.currentTimeMillis() - startTime);
progress.stopSection(section);