mirror of https://github.com/apache/druid.git
clean tmp file when index merge fail
This commit is contained in:
parent
041350c31b
commit
57d78d3293
|
@ -34,6 +34,7 @@ import com.google.common.collect.Ordering;
|
||||||
import com.google.common.collect.PeekingIterator;
|
import com.google.common.collect.PeekingIterator;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import com.google.common.io.ByteStreams;
|
import com.google.common.io.ByteStreams;
|
||||||
|
import com.google.common.io.Closer;
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
import com.google.common.io.OutputSupplier;
|
import com.google.common.io.OutputSupplier;
|
||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
|
@ -85,6 +86,7 @@ import org.joda.time.DateTime;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
import java.io.Closeable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -604,79 +606,95 @@ public class IndexMerger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Closer closer = Closer.create();
|
||||||
final Interval dataInterval;
|
final Interval dataInterval;
|
||||||
File v8OutDir = new File(outDir, "v8-tmp");
|
final File v8OutDir = new File(outDir, "v8-tmp");
|
||||||
v8OutDir.mkdirs();
|
v8OutDir.mkdirs();
|
||||||
|
closer.register(new Closeable()
|
||||||
/************* Main index.drd file **************/
|
{
|
||||||
progress.progress();
|
@Override
|
||||||
long startTime = System.currentTimeMillis();
|
public void close() throws IOException
|
||||||
File indexFile = new File(v8OutDir, "index.drd");
|
{
|
||||||
|
FileUtils.deleteDirectory(v8OutDir);
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
|
|
||||||
FileChannel channel = fileOutputStream.getChannel()) {
|
|
||||||
channel.write(ByteBuffer.wrap(new byte[]{IndexIO.V8_VERSION}));
|
|
||||||
|
|
||||||
GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
|
|
||||||
GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
|
|
||||||
|
|
||||||
DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
|
|
||||||
DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
|
|
||||||
|
|
||||||
for (IndexableAdapter index : indexes) {
|
|
||||||
minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
|
|
||||||
maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
|
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
final IOPeon ioPeon = new TmpFileIOPeon();
|
||||||
|
closer.register(new Closeable()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
ioPeon.cleanup();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
/************* Main index.drd file **************/
|
||||||
|
progress.progress();
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
File indexFile = new File(v8OutDir, "index.drd");
|
||||||
|
|
||||||
dataInterval = new Interval(minTime, maxTime);
|
try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
|
||||||
serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
|
FileChannel channel = fileOutputStream.getChannel()) {
|
||||||
serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
|
channel.write(ByteBuffer.wrap(new byte[]{IndexIO.V8_VERSION}));
|
||||||
}
|
|
||||||
IndexIO.checkFileSize(indexFile);
|
|
||||||
log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
|
||||||
|
|
||||||
/************* Setup Dim Conversions **************/
|
GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
|
||||||
progress.progress();
|
GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
|
||||||
startTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
IOPeon ioPeon = new TmpFileIOPeon();
|
DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
|
||||||
ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
|
||||||
Map<String, Integer> dimensionCardinalities = Maps.newHashMap();
|
|
||||||
ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(indexes.size());
|
|
||||||
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
|
||||||
|
|
||||||
for (int i = 0; i < indexes.size(); ++i) {
|
for (IndexableAdapter index : indexes) {
|
||||||
dimConversions.add(Maps.<String, IntBuffer>newHashMap());
|
minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
|
||||||
}
|
maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
|
||||||
|
|
||||||
for (String dimension : mergedDimensions) {
|
|
||||||
final GenericIndexedWriter<String> writer = new GenericIndexedWriter<String>(
|
|
||||||
ioPeon, dimension, GenericIndexed.STRING_STRATEGY
|
|
||||||
);
|
|
||||||
writer.open();
|
|
||||||
|
|
||||||
boolean dimHasNull = false;
|
|
||||||
boolean dimHasValues = false;
|
|
||||||
boolean dimAbsentFromSomeIndex = false;
|
|
||||||
|
|
||||||
int numMergeIndex = 0;
|
|
||||||
Indexed<String> dimValueLookup = null;
|
|
||||||
Indexed<String>[] dimValueLookups = new Indexed[indexes.size() + 1];
|
|
||||||
for (int i = 0; i < indexes.size(); i++) {
|
|
||||||
Indexed<String> dimValues = indexes.get(i).getDimValueLookup(dimension);
|
|
||||||
if (!isNullColumn(dimValues)) {
|
|
||||||
dimHasValues = true;
|
|
||||||
dimHasNull |= dimValues.indexOf(null) >= 0;
|
|
||||||
dimValueLookups[i] = dimValueLookup = dimValues;
|
|
||||||
numMergeIndex++;
|
|
||||||
} else {
|
|
||||||
dimAbsentFromSomeIndex = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dataInterval = new Interval(minTime, maxTime);
|
||||||
|
serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
|
||||||
|
serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
|
||||||
|
}
|
||||||
|
IndexIO.checkFileSize(indexFile);
|
||||||
|
log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
||||||
|
|
||||||
|
/************* Setup Dim Conversions **************/
|
||||||
|
progress.progress();
|
||||||
|
startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
Map<String, Integer> dimensionCardinalities = Maps.newHashMap();
|
||||||
|
ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(indexes.size());
|
||||||
|
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
|
||||||
|
for (int i = 0; i < indexes.size(); ++i) {
|
||||||
|
dimConversions.add(Maps.<String, IntBuffer>newHashMap());
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean convertMissingDims = dimHasValues && dimAbsentFromSomeIndex;
|
for (String dimension : mergedDimensions) {
|
||||||
convertMissingDimsFlags.add(convertMissingDims);
|
final GenericIndexedWriter<String> writer = new GenericIndexedWriter<String>(
|
||||||
|
ioPeon, dimension, GenericIndexed.STRING_STRATEGY
|
||||||
|
);
|
||||||
|
writer.open();
|
||||||
|
|
||||||
|
boolean dimHasNull = false;
|
||||||
|
boolean dimHasValues = false;
|
||||||
|
boolean dimAbsentFromSomeIndex = false;
|
||||||
|
|
||||||
|
int numMergeIndex = 0;
|
||||||
|
Indexed<String> dimValueLookup = null;
|
||||||
|
Indexed<String>[] dimValueLookups = new Indexed[indexes.size() + 1];
|
||||||
|
for (int i = 0; i < indexes.size(); i++) {
|
||||||
|
Indexed<String> dimValues = indexes.get(i).getDimValueLookup(dimension);
|
||||||
|
if (!isNullColumn(dimValues)) {
|
||||||
|
dimHasValues = true;
|
||||||
|
dimHasNull |= dimValues.indexOf(null) >= 0;
|
||||||
|
dimValueLookups[i] = dimValueLookup = dimValues;
|
||||||
|
numMergeIndex++;
|
||||||
|
} else {
|
||||||
|
dimAbsentFromSomeIndex = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean convertMissingDims = dimHasValues && dimAbsentFromSomeIndex;
|
||||||
|
convertMissingDimsFlags.add(convertMissingDims);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure the empty str is always in the dictionary if the dimension was missing from one index but
|
* Ensure the empty str is always in the dictionary if the dimension was missing from one index but
|
||||||
|
@ -685,331 +703,333 @@ public class IndexMerger
|
||||||
* later on, to allow rows from indexes without a particular dimension to merge correctly with
|
* later on, to allow rows from indexes without a particular dimension to merge correctly with
|
||||||
* rows from indexes with null/empty str values for that dimension.
|
* rows from indexes with null/empty str values for that dimension.
|
||||||
*/
|
*/
|
||||||
if (convertMissingDims && !dimHasNull) {
|
if (convertMissingDims && !dimHasNull) {
|
||||||
dimValueLookups[indexes.size()] = dimValueLookup = EMPTY_STR_DIM_VAL;
|
dimValueLookups[indexes.size()] = dimValueLookup = EMPTY_STR_DIM_VAL;
|
||||||
numMergeIndex++;
|
numMergeIndex++;
|
||||||
}
|
|
||||||
|
|
||||||
int cardinality = 0;
|
|
||||||
if (numMergeIndex > 1) {
|
|
||||||
DictionaryMergeIterator iterator = new DictionaryMergeIterator(dimValueLookups, true);
|
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
writer.write(iterator.next());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < indexes.size(); i++) {
|
int cardinality = 0;
|
||||||
if (dimValueLookups[i] != null && iterator.needConversion(i)) {
|
if (numMergeIndex > 1) {
|
||||||
dimConversions.get(i).put(dimension, iterator.conversions[i]);
|
DictionaryMergeIterator iterator = new DictionaryMergeIterator(dimValueLookups, true);
|
||||||
|
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
writer.write(iterator.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < indexes.size(); i++) {
|
||||||
|
if (dimValueLookups[i] != null && iterator.needConversion(i)) {
|
||||||
|
dimConversions.get(i).put(dimension, iterator.conversions[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cardinality = iterator.counter;
|
||||||
|
} else if (numMergeIndex == 1) {
|
||||||
|
for (String value : dimValueLookup) {
|
||||||
|
writer.write(value);
|
||||||
|
}
|
||||||
|
cardinality = dimValueLookup.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
dimensionCardinalities.put(dimension, cardinality);
|
||||||
|
|
||||||
|
FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true);
|
||||||
|
dimOuts.add(dimOut);
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
serializerUtils.writeString(dimOut, dimension);
|
||||||
|
ByteStreams.copy(writer.combineStreams(), dimOut);
|
||||||
|
|
||||||
|
ioPeon.cleanup();
|
||||||
|
}
|
||||||
|
log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
||||||
|
|
||||||
|
/************* Walk through data sets and merge them *************/
|
||||||
|
progress.progress();
|
||||||
|
startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
Iterable<Rowboat> theRows = makeRowIterable(
|
||||||
|
indexes,
|
||||||
|
mergedDimensions,
|
||||||
|
mergedMetrics,
|
||||||
|
dimConversions,
|
||||||
|
convertMissingDimsFlags,
|
||||||
|
rowMergerFn
|
||||||
|
);
|
||||||
|
|
||||||
|
CompressedLongsSupplierSerializer timeWriter = CompressedLongsSupplierSerializer.create(
|
||||||
|
ioPeon, "little_end_time", IndexIO.BYTE_ORDER, CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY
|
||||||
|
);
|
||||||
|
|
||||||
|
timeWriter.open();
|
||||||
|
|
||||||
|
ArrayList<VSizeIndexedWriter> forwardDimWriters = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
for (String dimension : mergedDimensions) {
|
||||||
|
VSizeIndexedWriter writer = new VSizeIndexedWriter(ioPeon, dimension, dimensionCardinalities.get(dimension));
|
||||||
|
writer.open();
|
||||||
|
forwardDimWriters.add(writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
|
||||||
|
for (String metric : mergedMetrics) {
|
||||||
|
ValueType type = valueTypes.get(metric);
|
||||||
|
switch (type) {
|
||||||
|
case LONG:
|
||||||
|
metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon));
|
||||||
|
break;
|
||||||
|
case FLOAT:
|
||||||
|
metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon));
|
||||||
|
break;
|
||||||
|
case COMPLEX:
|
||||||
|
final String typeName = metricTypeNames.get(metric);
|
||||||
|
ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
|
||||||
|
|
||||||
|
if (serde == null) {
|
||||||
|
throw new ISE("Unknown type[%s]", typeName);
|
||||||
|
}
|
||||||
|
|
||||||
|
metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new ISE("Unknown type[%s]", type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (MetricColumnSerializer metWriter : metWriters) {
|
||||||
|
metWriter.open();
|
||||||
|
}
|
||||||
|
|
||||||
|
int rowCount = 0;
|
||||||
|
long time = System.currentTimeMillis();
|
||||||
|
List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
|
||||||
|
for (IndexableAdapter index : indexes) {
|
||||||
|
int[] arr = new int[index.getNumRows()];
|
||||||
|
Arrays.fill(arr, INVALID_ROW);
|
||||||
|
rowNumConversions.add(IntBuffer.wrap(arr));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Rowboat theRow : theRows) {
|
||||||
|
progress.progress();
|
||||||
|
timeWriter.add(theRow.getTimestamp());
|
||||||
|
|
||||||
|
final Object[] metrics = theRow.getMetrics();
|
||||||
|
for (int i = 0; i < metrics.length; ++i) {
|
||||||
|
metWriters.get(i).serialize(metrics[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int[][] dims = theRow.getDims();
|
||||||
|
for (int i = 0; i < dims.length; ++i) {
|
||||||
|
List<Integer> listToWrite = (i >= dims.length || dims[i] == null)
|
||||||
|
? null
|
||||||
|
: Ints.asList(dims[i]);
|
||||||
|
forwardDimWriters.get(i).write(listToWrite);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
|
||||||
|
final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
|
||||||
|
|
||||||
|
for (Integer rowNum : comprisedRow.getValue()) {
|
||||||
|
while (conversionBuffer.position() < rowNum) {
|
||||||
|
conversionBuffer.put(INVALID_ROW);
|
||||||
|
}
|
||||||
|
conversionBuffer.put(rowCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cardinality = iterator.counter;
|
|
||||||
} else if (numMergeIndex == 1) {
|
if ((++rowCount % 500000) == 0) {
|
||||||
for (String value : dimValueLookup) {
|
log.info(
|
||||||
writer.write(value);
|
"outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time
|
||||||
|
);
|
||||||
|
time = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
cardinality = dimValueLookup.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dimensionCardinalities.put(dimension, cardinality);
|
for (IntBuffer rowNumConversion : rowNumConversions) {
|
||||||
|
rowNumConversion.rewind();
|
||||||
|
}
|
||||||
|
|
||||||
FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true);
|
final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
|
||||||
dimOuts.add(dimOut);
|
timeFile.delete();
|
||||||
|
OutputSupplier<FileOutputStream> out = Files.newOutputStreamSupplier(timeFile, true);
|
||||||
|
timeWriter.closeAndConsolidate(out);
|
||||||
|
IndexIO.checkFileSize(timeFile);
|
||||||
|
|
||||||
writer.close();
|
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
||||||
serializerUtils.writeString(dimOut, dimension);
|
forwardDimWriters.get(i).close();
|
||||||
ByteStreams.copy(writer.combineStreams(), dimOut);
|
ByteStreams.copy(forwardDimWriters.get(i).combineStreams(), dimOuts.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (MetricColumnSerializer metWriter : metWriters) {
|
||||||
|
metWriter.close();
|
||||||
|
}
|
||||||
|
|
||||||
ioPeon.cleanup();
|
ioPeon.cleanup();
|
||||||
}
|
log.info(
|
||||||
log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
"outDir[%s] completed walk through of %,d rows in %,d millis.",
|
||||||
|
v8OutDir,
|
||||||
/************* Walk through data sets and merge them *************/
|
rowCount,
|
||||||
progress.progress();
|
System.currentTimeMillis() - startTime
|
||||||
startTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Iterable<Rowboat> theRows = makeRowIterable(
|
|
||||||
indexes,
|
|
||||||
mergedDimensions,
|
|
||||||
mergedMetrics,
|
|
||||||
dimConversions,
|
|
||||||
convertMissingDimsFlags,
|
|
||||||
rowMergerFn
|
|
||||||
);
|
|
||||||
|
|
||||||
CompressedLongsSupplierSerializer timeWriter = CompressedLongsSupplierSerializer.create(
|
|
||||||
ioPeon, "little_end_time", IndexIO.BYTE_ORDER, CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY
|
|
||||||
);
|
|
||||||
|
|
||||||
timeWriter.open();
|
|
||||||
|
|
||||||
ArrayList<VSizeIndexedWriter> forwardDimWriters = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
|
||||||
for (String dimension : mergedDimensions) {
|
|
||||||
VSizeIndexedWriter writer = new VSizeIndexedWriter(ioPeon, dimension, dimensionCardinalities.get(dimension));
|
|
||||||
writer.open();
|
|
||||||
forwardDimWriters.add(writer);
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
|
|
||||||
for (String metric : mergedMetrics) {
|
|
||||||
ValueType type = valueTypes.get(metric);
|
|
||||||
switch (type) {
|
|
||||||
case LONG:
|
|
||||||
metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon));
|
|
||||||
break;
|
|
||||||
case FLOAT:
|
|
||||||
metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon));
|
|
||||||
break;
|
|
||||||
case COMPLEX:
|
|
||||||
final String typeName = metricTypeNames.get(metric);
|
|
||||||
ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
|
|
||||||
|
|
||||||
if (serde == null) {
|
|
||||||
throw new ISE("Unknown type[%s]", typeName);
|
|
||||||
}
|
|
||||||
|
|
||||||
metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new ISE("Unknown type[%s]", type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (MetricColumnSerializer metWriter : metWriters) {
|
|
||||||
metWriter.open();
|
|
||||||
}
|
|
||||||
|
|
||||||
int rowCount = 0;
|
|
||||||
long time = System.currentTimeMillis();
|
|
||||||
List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
|
|
||||||
for (IndexableAdapter index : indexes) {
|
|
||||||
int[] arr = new int[index.getNumRows()];
|
|
||||||
Arrays.fill(arr, INVALID_ROW);
|
|
||||||
rowNumConversions.add(IntBuffer.wrap(arr));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Rowboat theRow : theRows) {
|
|
||||||
progress.progress();
|
|
||||||
timeWriter.add(theRow.getTimestamp());
|
|
||||||
|
|
||||||
final Object[] metrics = theRow.getMetrics();
|
|
||||||
for (int i = 0; i < metrics.length; ++i) {
|
|
||||||
metWriters.get(i).serialize(metrics[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
int[][] dims = theRow.getDims();
|
|
||||||
for (int i = 0; i < dims.length; ++i) {
|
|
||||||
List<Integer> listToWrite = (i >= dims.length || dims[i] == null)
|
|
||||||
? null
|
|
||||||
: Ints.asList(dims[i]);
|
|
||||||
forwardDimWriters.get(i).write(listToWrite);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
|
|
||||||
final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
|
|
||||||
|
|
||||||
for (Integer rowNum : comprisedRow.getValue()) {
|
|
||||||
while (conversionBuffer.position() < rowNum) {
|
|
||||||
conversionBuffer.put(INVALID_ROW);
|
|
||||||
}
|
|
||||||
conversionBuffer.put(rowCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((++rowCount % 500000) == 0) {
|
|
||||||
log.info(
|
|
||||||
"outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time
|
|
||||||
);
|
|
||||||
time = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (IntBuffer rowNumConversion : rowNumConversions) {
|
|
||||||
rowNumConversion.rewind();
|
|
||||||
}
|
|
||||||
|
|
||||||
final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
|
|
||||||
timeFile.delete();
|
|
||||||
OutputSupplier<FileOutputStream> out = Files.newOutputStreamSupplier(timeFile, true);
|
|
||||||
timeWriter.closeAndConsolidate(out);
|
|
||||||
IndexIO.checkFileSize(timeFile);
|
|
||||||
|
|
||||||
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
|
||||||
forwardDimWriters.get(i).close();
|
|
||||||
ByteStreams.copy(forwardDimWriters.get(i).combineStreams(), dimOuts.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (MetricColumnSerializer metWriter : metWriters) {
|
|
||||||
metWriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
ioPeon.cleanup();
|
|
||||||
log.info(
|
|
||||||
"outDir[%s] completed walk through of %,d rows in %,d millis.",
|
|
||||||
v8OutDir,
|
|
||||||
rowCount,
|
|
||||||
System.currentTimeMillis() - startTime
|
|
||||||
);
|
|
||||||
|
|
||||||
/************ Create Inverted Indexes *************/
|
|
||||||
startTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
final File invertedFile = new File(v8OutDir, "inverted.drd");
|
|
||||||
Files.touch(invertedFile);
|
|
||||||
out = Files.newOutputStreamSupplier(invertedFile, true);
|
|
||||||
|
|
||||||
final File geoFile = new File(v8OutDir, "spatial.drd");
|
|
||||||
Files.touch(geoFile);
|
|
||||||
OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
|
|
||||||
|
|
||||||
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
|
||||||
long dimStartTime = System.currentTimeMillis();
|
|
||||||
String dimension = mergedDimensions.get(i);
|
|
||||||
|
|
||||||
File dimOutFile = dimOuts.get(i).getFile();
|
|
||||||
final MappedByteBuffer dimValsMapped = Files.map(dimOutFile);
|
|
||||||
|
|
||||||
if (!dimension.equals(serializerUtils.readString(dimValsMapped))) {
|
|
||||||
throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension);
|
|
||||||
}
|
|
||||||
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
|
|
||||||
log.info("Starting dimension[%s] with cardinality[%,d]", dimension, dimVals.size());
|
|
||||||
|
|
||||||
final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
|
|
||||||
GenericIndexedWriter<ImmutableBitmap> writer = new GenericIndexedWriter<>(
|
|
||||||
ioPeon, dimension, bitmapSerdeFactory.getObjectStrategy()
|
|
||||||
);
|
);
|
||||||
writer.open();
|
|
||||||
|
|
||||||
boolean isSpatialDim = columnCapabilities.get(dimension).hasSpatialIndexes();
|
/************ Create Inverted Indexes *************/
|
||||||
ByteBufferWriter<ImmutableRTree> spatialWriter = null;
|
startTime = System.currentTimeMillis();
|
||||||
RTree tree = null;
|
|
||||||
IOPeon spatialIoPeon = new TmpFileIOPeon();
|
final File invertedFile = new File(v8OutDir, "inverted.drd");
|
||||||
if (isSpatialDim) {
|
Files.touch(invertedFile);
|
||||||
BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
|
out = Files.newOutputStreamSupplier(invertedFile, true);
|
||||||
spatialWriter = new ByteBufferWriter<ImmutableRTree>(
|
|
||||||
spatialIoPeon, dimension, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapFactory)
|
final File geoFile = new File(v8OutDir, "spatial.drd");
|
||||||
|
Files.touch(geoFile);
|
||||||
|
OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
|
||||||
|
|
||||||
|
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
||||||
|
long dimStartTime = System.currentTimeMillis();
|
||||||
|
String dimension = mergedDimensions.get(i);
|
||||||
|
|
||||||
|
File dimOutFile = dimOuts.get(i).getFile();
|
||||||
|
final MappedByteBuffer dimValsMapped = Files.map(dimOutFile);
|
||||||
|
|
||||||
|
if (!dimension.equals(serializerUtils.readString(dimValsMapped))) {
|
||||||
|
throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension);
|
||||||
|
}
|
||||||
|
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
|
||||||
|
log.info("Starting dimension[%s] with cardinality[%,d]", dimension, dimVals.size());
|
||||||
|
|
||||||
|
final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
|
||||||
|
GenericIndexedWriter<ImmutableBitmap> writer = new GenericIndexedWriter<>(
|
||||||
|
ioPeon, dimension, bitmapSerdeFactory.getObjectStrategy()
|
||||||
);
|
);
|
||||||
spatialWriter.open();
|
writer.open();
|
||||||
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexSeeker[] dictIdSeeker = toIndexSeekers(indexes, dimConversions, dimension);
|
boolean isSpatialDim = columnCapabilities.get(dimension).hasSpatialIndexes();
|
||||||
|
ByteBufferWriter<ImmutableRTree> spatialWriter = null;
|
||||||
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
|
RTree tree = null;
|
||||||
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
|
IOPeon spatialIoPeon = new TmpFileIOPeon();
|
||||||
progress.progress();
|
if (isSpatialDim) {
|
||||||
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
|
BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
|
||||||
for (int j = 0; j < indexes.size(); ++j) {
|
spatialWriter = new ByteBufferWriter<ImmutableRTree>(
|
||||||
int seekedDictId = dictIdSeeker[j].seek(dictId);
|
spatialIoPeon, dimension, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapFactory)
|
||||||
if (seekedDictId != IndexSeeker.NOT_EXIST) {
|
);
|
||||||
convertedInverteds.add(
|
spatialWriter.open();
|
||||||
new ConvertingIndexedInts(
|
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
|
||||||
indexes.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
|
IndexSeeker[] dictIdSeeker = toIndexSeekers(indexes, dimConversions, dimension);
|
||||||
for (Integer row : CombiningIterable.createSplatted(
|
|
||||||
convertedInverteds,
|
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
|
||||||
Ordering.<Integer>natural().nullsFirst()
|
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
|
||||||
)) {
|
progress.progress();
|
||||||
if (row != INVALID_ROW) {
|
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
|
||||||
bitset.add(row);
|
for (int j = 0; j < indexes.size(); ++j) {
|
||||||
|
int seekedDictId = dictIdSeeker[j].seek(dictId);
|
||||||
|
if (seekedDictId != IndexSeeker.NOT_EXIST) {
|
||||||
|
convertedInverteds.add(
|
||||||
|
new ConvertingIndexedInts(
|
||||||
|
indexes.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
|
||||||
|
for (Integer row : CombiningIterable.createSplatted(
|
||||||
|
convertedInverteds,
|
||||||
|
Ordering.<Integer>natural().nullsFirst()
|
||||||
|
)) {
|
||||||
|
if (row != INVALID_ROW) {
|
||||||
|
bitset.add(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.write(
|
||||||
|
bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isSpatialDim) {
|
||||||
|
String dimVal = dimVals.get(dictId);
|
||||||
|
if (dimVal != null) {
|
||||||
|
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||||
|
float[] coords = new float[stringCoords.size()];
|
||||||
|
for (int j = 0; j < coords.length; j++) {
|
||||||
|
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||||
|
}
|
||||||
|
tree.insert(coords, bitset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
writer.write(
|
serializerUtils.writeString(out, dimension);
|
||||||
bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)
|
ByteStreams.copy(writer.combineStreams(), out);
|
||||||
);
|
ioPeon.cleanup();
|
||||||
|
|
||||||
|
log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime);
|
||||||
|
|
||||||
if (isSpatialDim) {
|
if (isSpatialDim) {
|
||||||
String dimVal = dimVals.get(dictId);
|
spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
|
||||||
if (dimVal != null) {
|
spatialWriter.close();
|
||||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
|
||||||
float[] coords = new float[stringCoords.size()];
|
serializerUtils.writeString(spatialOut, dimension);
|
||||||
for (int j = 0; j < coords.length; j++) {
|
ByteStreams.copy(spatialWriter.combineStreams(), spatialOut);
|
||||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
spatialIoPeon.cleanup();
|
||||||
}
|
|
||||||
tree.insert(coords, bitset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writer.close();
|
|
||||||
|
|
||||||
serializerUtils.writeString(out, dimension);
|
log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
||||||
ByteStreams.copy(writer.combineStreams(), out);
|
|
||||||
ioPeon.cleanup();
|
|
||||||
|
|
||||||
log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime);
|
final ArrayList<String> expectedFiles = Lists.newArrayList(
|
||||||
|
Iterables.concat(
|
||||||
|
Arrays.asList(
|
||||||
|
"index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)
|
||||||
|
),
|
||||||
|
Iterables.transform(mergedDimensions, GuavaUtils.formatFunction("dim_%s.drd")),
|
||||||
|
Iterables.transform(
|
||||||
|
mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
if (isSpatialDim) {
|
if (segmentMetadata != null) {
|
||||||
spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
|
writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
|
||||||
spatialWriter.close();
|
log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
|
||||||
|
|
||||||
serializerUtils.writeString(spatialOut, dimension);
|
expectedFiles.add("metadata.drd");
|
||||||
ByteStreams.copy(spatialWriter.combineStreams(), spatialOut);
|
|
||||||
spatialIoPeon.cleanup();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Map<String, File> files = Maps.newLinkedHashMap();
|
||||||
|
for (String fileName : expectedFiles) {
|
||||||
|
files.put(fileName, new File(v8OutDir, fileName));
|
||||||
|
}
|
||||||
|
|
||||||
|
File smooshDir = new File(v8OutDir, "smoosher");
|
||||||
|
smooshDir.mkdir();
|
||||||
|
|
||||||
|
for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
|
||||||
|
entry.getValue().delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (File file : smooshDir.listFiles()) {
|
||||||
|
Files.move(file, new File(v8OutDir, file.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!smooshDir.delete()) {
|
||||||
|
log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
|
||||||
|
throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
|
||||||
|
}
|
||||||
|
|
||||||
|
createIndexDrdFile(
|
||||||
|
IndexIO.V8_VERSION,
|
||||||
|
v8OutDir,
|
||||||
|
GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY),
|
||||||
|
GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY),
|
||||||
|
dataInterval,
|
||||||
|
indexSpec.getBitmapSerdeFactory()
|
||||||
|
);
|
||||||
|
|
||||||
|
indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
|
||||||
|
return outDir;
|
||||||
}
|
}
|
||||||
|
finally {
|
||||||
log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
|
closer.close();
|
||||||
|
|
||||||
final ArrayList<String> expectedFiles = Lists.newArrayList(
|
|
||||||
Iterables.concat(
|
|
||||||
Arrays.asList(
|
|
||||||
"index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)
|
|
||||||
),
|
|
||||||
Iterables.transform(mergedDimensions, GuavaUtils.formatFunction("dim_%s.drd")),
|
|
||||||
Iterables.transform(
|
|
||||||
mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (segmentMetadata != null) {
|
|
||||||
writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
|
|
||||||
log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
|
|
||||||
|
|
||||||
expectedFiles.add("metadata.drd");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, File> files = Maps.newLinkedHashMap();
|
|
||||||
for (String fileName : expectedFiles) {
|
|
||||||
files.put(fileName, new File(v8OutDir, fileName));
|
|
||||||
}
|
|
||||||
|
|
||||||
File smooshDir = new File(v8OutDir, "smoosher");
|
|
||||||
smooshDir.mkdir();
|
|
||||||
|
|
||||||
for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
|
|
||||||
entry.getValue().delete();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (File file : smooshDir.listFiles()) {
|
|
||||||
Files.move(file, new File(v8OutDir, file.getName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!smooshDir.delete()) {
|
|
||||||
log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
|
|
||||||
throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
|
|
||||||
}
|
|
||||||
|
|
||||||
createIndexDrdFile(
|
|
||||||
IndexIO.V8_VERSION,
|
|
||||||
v8OutDir,
|
|
||||||
GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY),
|
|
||||||
GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY),
|
|
||||||
dataInterval,
|
|
||||||
indexSpec.getBitmapSerdeFactory()
|
|
||||||
);
|
|
||||||
|
|
||||||
indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
|
|
||||||
FileUtils.deleteDirectory(v8OutDir);
|
|
||||||
|
|
||||||
return outDir;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Iterable<Rowboat> makeRowIterable(
|
protected Iterable<Rowboat> makeRowIterable(
|
||||||
|
|
|
@ -27,6 +27,7 @@ import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import com.google.common.io.ByteStreams;
|
import com.google.common.io.ByteStreams;
|
||||||
|
import com.google.common.io.Closer;
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
@ -74,6 +75,7 @@ import org.joda.time.DateTime;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.Closeable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -144,102 +146,121 @@ public class IndexMergerV9 extends IndexMerger
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Closer closer = Closer.create();
|
||||||
final IOPeon ioPeon = new TmpFileIOPeon(false);
|
final IOPeon ioPeon = new TmpFileIOPeon(false);
|
||||||
|
closer.register(new Closeable()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
ioPeon.cleanup();
|
||||||
|
}
|
||||||
|
});
|
||||||
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
|
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
|
||||||
final File v9TmpDir = new File(outDir, "v9-tmp");
|
final File v9TmpDir = new File(outDir, "v9-tmp");
|
||||||
v9TmpDir.mkdirs();
|
v9TmpDir.mkdirs();
|
||||||
|
closer.register(new Closeable()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
FileUtils.deleteDirectory(v9TmpDir);
|
||||||
|
}
|
||||||
|
});
|
||||||
log.info("Start making v9 index files, outDir:%s", outDir);
|
log.info("Start making v9 index files, outDir:%s", outDir);
|
||||||
|
try {
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
ByteStreams.write(
|
||||||
|
Ints.toByteArray(IndexIO.V9_VERSION),
|
||||||
|
Files.newOutputStreamSupplier(new File(outDir, "version.bin"))
|
||||||
|
);
|
||||||
|
log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
progress.progress();
|
||||||
ByteStreams.write(
|
final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
|
||||||
Ints.toByteArray(IndexIO.V9_VERSION),
|
final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
|
||||||
Files.newOutputStreamSupplier(new File(outDir, "version.bin"))
|
final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
);
|
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
|
||||||
log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
|
|
||||||
|
|
||||||
progress.progress();
|
/************* Setup Dim Conversions **************/
|
||||||
final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
|
progress.progress();
|
||||||
final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
|
startTime = System.currentTimeMillis();
|
||||||
final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
final Map<String, Integer> dimCardinalities = Maps.newHashMap();
|
||||||
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
|
final ArrayList<GenericIndexedWriter<String>> dimValueWriters = setupDimValueWriters(ioPeon, mergedDimensions);
|
||||||
|
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
|
||||||
|
final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
final ArrayList<Boolean> dimHasNullFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
writeDimValueAndSetupDimConversion(
|
||||||
|
adapters, progress, mergedDimensions, dimCardinalities, dimValueWriters, dimensionSkipFlag, dimConversions,
|
||||||
|
convertMissingDimsFlags, dimHasNullFlags
|
||||||
|
);
|
||||||
|
log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
|
||||||
|
|
||||||
/************* Setup Dim Conversions **************/
|
/************* Walk through data sets, merge them, and write merged columns *************/
|
||||||
progress.progress();
|
progress.progress();
|
||||||
startTime = System.currentTimeMillis();
|
final Iterable<Rowboat> theRows = makeRowIterable(
|
||||||
final Map<String, Integer> dimCardinalities = Maps.newHashMap();
|
adapters,
|
||||||
final ArrayList<GenericIndexedWriter<String>> dimValueWriters = setupDimValueWriters(ioPeon, mergedDimensions);
|
mergedDimensions,
|
||||||
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
|
mergedMetrics,
|
||||||
final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
dimConversions,
|
||||||
final ArrayList<Boolean> dimHasNullFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
convertMissingDimsFlags,
|
||||||
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
rowMergerFn
|
||||||
writeDimValueAndSetupDimConversion(
|
);
|
||||||
adapters, progress, mergedDimensions, dimCardinalities, dimValueWriters, dimensionSkipFlag, dimConversions,
|
final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon);
|
||||||
convertMissingDimsFlags, dimHasNullFlags
|
final ArrayList<IndexedIntsWriter> dimWriters = setupDimensionWriters(
|
||||||
);
|
ioPeon, mergedDimensions, dimCapabilities, dimCardinalities, indexSpec
|
||||||
log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
|
);
|
||||||
|
final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(
|
||||||
|
ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec
|
||||||
|
);
|
||||||
|
final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
|
||||||
|
final ArrayList<MutableBitmap> nullRowsList = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
||||||
|
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
||||||
|
nullRowsList.add(indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap());
|
||||||
|
}
|
||||||
|
mergeIndexesAndWriteColumns(
|
||||||
|
adapters, progress, theRows, timeWriter, dimWriters, metWriters,
|
||||||
|
dimensionSkipFlag, rowNumConversions, nullRowsList, dimHasNullFlags
|
||||||
|
);
|
||||||
|
|
||||||
/************* Walk through data sets, merge them, and write merged columns *************/
|
/************ Create Inverted Indexes *************/
|
||||||
progress.progress();
|
progress.progress();
|
||||||
final Iterable<Rowboat> theRows = makeRowIterable(
|
final ArrayList<GenericIndexedWriter<ImmutableBitmap>> bitmapIndexWriters = setupBitmapIndexWriters(
|
||||||
adapters,
|
ioPeon, mergedDimensions, indexSpec
|
||||||
mergedDimensions,
|
);
|
||||||
mergedMetrics,
|
final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters = setupSpatialIndexWriters(
|
||||||
dimConversions,
|
ioPeon, mergedDimensions, indexSpec, dimCapabilities
|
||||||
convertMissingDimsFlags,
|
);
|
||||||
rowMergerFn
|
makeInvertedIndexes(
|
||||||
);
|
adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions,
|
||||||
final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon);
|
nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters, dimConversions
|
||||||
final ArrayList<IndexedIntsWriter> dimWriters = setupDimensionWriters(
|
);
|
||||||
ioPeon, mergedDimensions, dimCapabilities, dimCardinalities, indexSpec
|
|
||||||
);
|
/************ Finalize Build Columns *************/
|
||||||
final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(
|
progress.progress();
|
||||||
ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec
|
makeTimeColumn(v9Smoosher, progress, timeWriter);
|
||||||
);
|
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
|
||||||
final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
|
makeDimensionColumns(
|
||||||
final ArrayList<MutableBitmap> nullRowsList = Lists.newArrayListWithCapacity(mergedDimensions.size());
|
v9Smoosher, progress, indexSpec, mergedDimensions, dimensionSkipFlag, dimCapabilities,
|
||||||
for (int i = 0; i < mergedDimensions.size(); ++i) {
|
dimValueWriters, dimWriters, bitmapIndexWriters, spatialIndexWriters
|
||||||
nullRowsList.add(indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap());
|
);
|
||||||
|
|
||||||
|
/************* Make index.drd & metadata.drd files **************/
|
||||||
|
progress.progress();
|
||||||
|
makeIndexBinary(
|
||||||
|
v9Smoosher, adapters, outDir, mergedDimensions, dimensionSkipFlag, mergedMetrics, progress, indexSpec
|
||||||
|
);
|
||||||
|
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
|
||||||
|
|
||||||
|
v9Smoosher.close();
|
||||||
|
progress.stop();
|
||||||
|
|
||||||
|
return outDir;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
closer.close();
|
||||||
}
|
}
|
||||||
mergeIndexesAndWriteColumns(
|
|
||||||
adapters, progress, theRows, timeWriter, dimWriters, metWriters,
|
|
||||||
dimensionSkipFlag, rowNumConversions, nullRowsList, dimHasNullFlags
|
|
||||||
);
|
|
||||||
|
|
||||||
/************ Create Inverted Indexes *************/
|
|
||||||
progress.progress();
|
|
||||||
final ArrayList<GenericIndexedWriter<ImmutableBitmap>> bitmapIndexWriters = setupBitmapIndexWriters(
|
|
||||||
ioPeon, mergedDimensions, indexSpec
|
|
||||||
);
|
|
||||||
final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters = setupSpatialIndexWriters(
|
|
||||||
ioPeon, mergedDimensions, indexSpec, dimCapabilities
|
|
||||||
);
|
|
||||||
makeInvertedIndexes(
|
|
||||||
adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions,
|
|
||||||
nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters, dimConversions
|
|
||||||
);
|
|
||||||
|
|
||||||
/************ Finalize Build Columns *************/
|
|
||||||
progress.progress();
|
|
||||||
makeTimeColumn(v9Smoosher, progress, timeWriter);
|
|
||||||
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
|
|
||||||
makeDimensionColumns(
|
|
||||||
v9Smoosher, progress, indexSpec, mergedDimensions, dimensionSkipFlag, dimCapabilities,
|
|
||||||
dimValueWriters, dimWriters, bitmapIndexWriters, spatialIndexWriters
|
|
||||||
);
|
|
||||||
|
|
||||||
/************* Make index.drd & metadata.drd files **************/
|
|
||||||
progress.progress();
|
|
||||||
makeIndexBinary(
|
|
||||||
v9Smoosher, adapters, outDir, mergedDimensions, dimensionSkipFlag, mergedMetrics, progress, indexSpec
|
|
||||||
);
|
|
||||||
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
|
|
||||||
|
|
||||||
v9Smoosher.close();
|
|
||||||
ioPeon.cleanup();
|
|
||||||
FileUtils.deleteDirectory(v9TmpDir);
|
|
||||||
progress.stop();
|
|
||||||
|
|
||||||
return outDir;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void makeMetadataBinary(
|
private void makeMetadataBinary(
|
||||||
|
|
|
@ -37,6 +37,7 @@ import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.CountAggregatorFactory;
|
import io.druid.query.aggregation.CountAggregatorFactory;
|
||||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||||
import io.druid.segment.column.Column;
|
import io.druid.segment.column.Column;
|
||||||
|
import io.druid.segment.column.ColumnCapabilitiesImpl;
|
||||||
import io.druid.segment.column.SimpleDictionaryEncodedColumn;
|
import io.druid.segment.column.SimpleDictionaryEncodedColumn;
|
||||||
import io.druid.segment.data.BitmapSerdeFactory;
|
import io.druid.segment.data.BitmapSerdeFactory;
|
||||||
import io.druid.segment.data.CompressedObjectStrategy;
|
import io.druid.segment.data.CompressedObjectStrategy;
|
||||||
|
@ -67,7 +68,6 @@ import java.nio.IntBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -1662,4 +1662,34 @@ public class IndexMergerTest
|
||||||
Assert.assertEquals(2, dictIdSeeker.seek(4));
|
Assert.assertEquals(2, dictIdSeeker.seek(4));
|
||||||
Assert.assertEquals(-1, dictIdSeeker.seek(5));
|
Assert.assertEquals(-1, dictIdSeeker.seek(5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(expected = IllegalArgumentException.class)
|
||||||
|
public void testCloser() throws Exception
|
||||||
|
{
|
||||||
|
final long timestamp = System.currentTimeMillis();
|
||||||
|
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
|
||||||
|
IncrementalIndexTest.populateIndex(timestamp, toPersist);
|
||||||
|
ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
|
||||||
|
capabilities.setHasSpatialIndexes(true);
|
||||||
|
|
||||||
|
final File tempDir = temporaryFolder.newFolder();
|
||||||
|
final File v8TmpDir = new File(tempDir, "v8-tmp");
|
||||||
|
final File v9TmpDir = new File(tempDir, "v9-tmp");
|
||||||
|
|
||||||
|
try {
|
||||||
|
INDEX_MERGER.persist(
|
||||||
|
toPersist,
|
||||||
|
tempDir,
|
||||||
|
indexSpec
|
||||||
|
);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (v8TmpDir.exists()) {
|
||||||
|
Assert.fail("v8-tmp dir not clean.");
|
||||||
|
}
|
||||||
|
if (v9TmpDir.exists()) {
|
||||||
|
Assert.fail("v9-tmp dir not clean.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue