Merge pull request #148 from metamx/refactor-indexing

Refactor indexing
This commit is contained in:
fjy 2013-06-06 14:12:33 -07:00
commit c2e0fb7b8e
314 changed files with 1220 additions and 886 deletions

View File

@ -18,7 +18,8 @@
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
--> -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.metamx.druid</groupId> <groupId>com.metamx.druid</groupId>
<artifactId>druid-client</artifactId> <artifactId>druid-client</artifactId>
@ -28,7 +29,7 @@
<parent> <parent>
<groupId>com.metamx</groupId> <groupId>com.metamx</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>0.4.33-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>
@ -39,7 +40,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.metamx.druid</groupId> <groupId>com.metamx.druid</groupId>
<artifactId>druid-index-common</artifactId> <artifactId>druid-indexing-common</artifactId>
<version>${project.parent.version}</version> <version>${project.parent.version}</version>
</dependency> </dependency>
@ -177,6 +178,10 @@
<groupId>commons-cli</groupId> <groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId> <artifactId>commons-cli</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.metamx</groupId>
<artifactId>bytebuffer-collections</artifactId>
</dependency>
<!-- Tests --> <!-- Tests -->
<dependency> <dependency>

View File

@ -108,7 +108,7 @@ public class IndexingServiceClient
throw new ISE("Cannot find instance of indexingService"); throw new ISE("Cannot find instance of indexingService");
} }
return String.format("http://%s:%s/mmx/merger/v1", instance.getAddress(), instance.getPort()); return String.format("http://%s:%s/druid/indexer/v1", instance.getAddress(), instance.getPort());
} }
catch (Exception e) { catch (Exception e) {
throw Throwables.propagate(e); throw Throwables.propagate(e);

View File

@ -28,7 +28,7 @@
<parent> <parent>
<groupId>com.metamx</groupId> <groupId>com.metamx</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>0.4.33-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>

View File

Before

Width:  |  Height:  |  Size: 239 KiB

After

Width:  |  Height:  |  Size: 239 KiB

View File

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 66 KiB

View File

Before

Width:  |  Height:  |  Size: 35 KiB

After

Width:  |  Height:  |  Size: 35 KiB

View File

Before

Width:  |  Height:  |  Size: 95 KiB

After

Width:  |  Height:  |  Size: 95 KiB

View File

@ -9,7 +9,7 @@
<parent> <parent>
<groupId>com.metamx</groupId> <groupId>com.metamx</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>0.4.33-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>

View File

@ -21,14 +21,14 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.metamx.druid</groupId> <groupId>com.metamx.druid</groupId>
<artifactId>druid-index-common</artifactId> <artifactId>druid-indexing-common</artifactId>
<name>druid-index-common</name> <name>druid-indexing-common</name>
<description>Druid Indexer</description> <description>Druid Indexing Common</description>
<parent> <parent>
<groupId>com.metamx</groupId> <groupId>com.metamx</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>0.4.33-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>

View File

@ -27,15 +27,13 @@ import com.metamx.druid.kv.GenericIndexed;
*/ */
public class SpatialIndexColumnPartSupplier implements Supplier<SpatialIndex> public class SpatialIndexColumnPartSupplier implements Supplier<SpatialIndex>
{ {
private static final ImmutableRTree EMPTY_SET = new ImmutableRTree();
private final ImmutableRTree indexedTree; private final ImmutableRTree indexedTree;
public SpatialIndexColumnPartSupplier( public SpatialIndexColumnPartSupplier(
ImmutableRTree indexedTree ImmutableRTree indexedTree
) )
{ {
this.indexedTree = (indexedTree == null) ? EMPTY_SET : indexedTree; this.indexedTree = indexedTree;
} }
@Override @Override

View File

@ -692,25 +692,29 @@ public class IndexIO
Map<String, Column> columns = Maps.newHashMap(); Map<String, Column> columns = Maps.newHashMap();
for (String dimension : index.getAvailableDimensions()) { for (String dimension : index.getAvailableDimensions()) {
ColumnBuilder builder = new ColumnBuilder()
.setType(ValueType.STRING)
.setHasMultipleValues(true)
.setDictionaryEncodedColumn(
new DictionaryEncodedColumnSupplier(
index.getDimValueLookup(dimension), null, (index.getDimColumn(dimension))
)
)
.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
index.getInvertedIndexes().get(dimension), index.getDimValueLookup(dimension)
)
);
if (index.getSpatialIndexes().get(dimension) != null) {
builder.setSpatialIndex(
new SpatialIndexColumnPartSupplier(
index.getSpatialIndexes().get(dimension)
)
);
}
columns.put( columns.put(
dimension.toLowerCase(), dimension.toLowerCase(),
new ColumnBuilder() builder.build()
.setType(ValueType.STRING)
.setHasMultipleValues(true)
.setDictionaryEncodedColumn(
new DictionaryEncodedColumnSupplier(
index.getDimValueLookup(dimension), null, (index.getDimColumn(dimension))
)
)
.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
index.getInvertedIndexes().get(dimension), index.getDimValueLookup(dimension)
)
).setSpatialIndex(
new SpatialIndexColumnPartSupplier(
index.getSpatialIndexes().get(dimension)
)
).build()
); );
} }

View File

@ -705,6 +705,11 @@ public class IndexMerger
final File invertedFile = new File(v8OutDir, "inverted.drd"); final File invertedFile = new File(v8OutDir, "inverted.drd");
Files.touch(invertedFile); Files.touch(invertedFile);
out = Files.newOutputStreamSupplier(invertedFile, true); out = Files.newOutputStreamSupplier(invertedFile, true);
final File geoFile = new File(v8OutDir, "spatial.drd");
Files.touch(geoFile);
OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
for (int i = 0; i < mergedDimensions.size(); ++i) { for (int i = 0; i < mergedDimensions.size(); ++i) {
long dimStartTime = System.currentTimeMillis(); long dimStartTime = System.currentTimeMillis();
String dimension = mergedDimensions.get(i); String dimension = mergedDimensions.get(i);
@ -723,6 +728,18 @@ public class IndexMerger
); );
writer.open(); writer.open();
boolean isSpatialDim = "spatial".equals(descriptions.get(dimension));
ByteBufferWriter<ImmutableRTree> spatialWriter = null;
RTree tree = null;
IOPeon spatialIoPeon = new TmpFileIOPeon();
if (isSpatialDim) {
spatialWriter = new ByteBufferWriter<ImmutableRTree>(
spatialIoPeon, dimension, IndexedRTree.objectStrategy
);
spatialWriter.open();
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50));
}
for (String dimVal : IndexedIterable.create(dimVals)) { for (String dimVal : IndexedIterable.create(dimVals)) {
progress.progress(); progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size()); List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
@ -745,6 +762,15 @@ public class IndexMerger
} }
writer.write(ImmutableConciseSet.newImmutableFromMutable(bitset)); writer.write(ImmutableConciseSet.newImmutableFromMutable(bitset));
if (isSpatialDim && dimVal != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, bitset);
}
} }
writer.close(); writer.close();
@ -753,64 +779,16 @@ public class IndexMerger
ioPeon.cleanup(); ioPeon.cleanup();
log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime); log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime);
}
/************ Create Geographical Indexes *************/ if (isSpatialDim) {
// FIXME: Rewrite when indexing is updated spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
Stopwatch stopwatch = new Stopwatch(); spatialWriter.close();
stopwatch.start();
final File geoFile = new File(v8OutDir, "spatial.drd"); serializerUtils.writeString(spatialOut, dimension);
Files.touch(geoFile); ByteStreams.copy(spatialWriter.combineStreams(), spatialOut);
out = Files.newOutputStreamSupplier(geoFile, true); spatialIoPeon.cleanup();
for (int i = 0; i < mergedDimensions.size(); ++i) {
String dimension = mergedDimensions.get(i);
if (!"spatial".equals(descriptions.get(dimension))) {
continue;
} }
File dimOutFile = dimOuts.get(i).getFile();
final MappedByteBuffer dimValsMapped = Files.map(dimOutFile);
if (!dimension.equals(serializerUtils.readString(dimValsMapped))) {
throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension);
}
Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.stringStrategy);
log.info("Indexing geo dimension[%s] with cardinality[%,d]", dimension, dimVals.size());
ByteBufferWriter<ImmutableRTree> writer = new ByteBufferWriter<ImmutableRTree>(
ioPeon, dimension, IndexedRTree.objectStrategy
);
writer.open();
RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50));
int count = 0;
for (String dimVal : IndexedIterable.create(dimVals)) {
progress.progress();
if (dimVal == null) {
continue;
}
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, count);
count++;
}
writer.write(ImmutableRTree.newImmutableFromMutable(tree));
writer.close();
serializerUtils.writeString(out, dimension);
ByteStreams.copy(writer.combineStreams(), out);
ioPeon.cleanup();
log.info("Completed spatial dimension[%s] in %,d millis.", dimension, stopwatch.elapsedMillis());
} }
log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);

Some files were not shown because too many files have changed in this diff Show More