diff --git a/api/src/main/java/io/druid/timeline/DataSegmentUtils.java b/api/src/main/java/io/druid/timeline/DataSegmentUtils.java index c735910bf8a..aa110d11d02 100644 --- a/api/src/main/java/io/druid/timeline/DataSegmentUtils.java +++ b/api/src/main/java/io/druid/timeline/DataSegmentUtils.java @@ -21,6 +21,7 @@ package io.druid.timeline; import com.google.common.base.Function; +import io.druid.java.util.common.IAE; import io.druid.java.util.common.logger.Logger; import org.joda.time.DateTime; @@ -44,51 +45,66 @@ public class DataSegmentUtils @Override public Interval apply(String identifier) { - return valueOf(datasource, identifier).getInterval(); + SegmentIdentifierParts segmentIdentifierParts = valueOf(datasource, identifier); + if (segmentIdentifierParts == null) { + throw new IAE("Invalid identifier [%s]", identifier); + } + + return segmentIdentifierParts.getInterval(); } }; } - // ignores shard spec + /** + * Parses a segment identifier into its components: dataSource, interval, version, and any trailing tags. Ignores + * shard spec. + * + * It is possible that this method may incorrectly parse an identifier, for example if the dataSource name in the + * identifier contains a DateTime parseable string such as 'datasource_2000-01-01T00:00:00.000Z' and dataSource was + * provided as 'datasource'. The desired behavior in this case would be to return null since the identifier does not + * actually belong to the provided dataSource but a non-null result would be returned. This is an edge case that would + * currently only affect paged select queries with a union dataSource of two similarly-named dataSources as in the + * given example. + * + * @param dataSource the dataSource corresponding to this identifier + * @param identifier segment identifier + * @return a {@link io.druid.timeline.DataSegmentUtils.SegmentIdentifierParts} object if the identifier could be + * parsed, null otherwise + */ public static SegmentIdentifierParts valueOf(String dataSource, String identifier) - { - SegmentIdentifierParts segmentDesc = parse(dataSource, identifier); - if (segmentDesc == null) { - throw new IllegalArgumentException("Invalid identifier " + identifier); - } - return segmentDesc; - } - - private static SegmentIdentifierParts parse(String dataSource, String identifier) { if (!identifier.startsWith(String.format("%s_", dataSource))) { - LOGGER.info("Invalid identifier %s", identifier); return null; } + String remaining = identifier.substring(dataSource.length() + 1); String[] splits = remaining.split(DataSegment.delimiter); if (splits.length < 3) { - LOGGER.info("Invalid identifier %s", identifier); return null; } DateTimeFormatter formatter = ISODateTimeFormat.dateTime(); - DateTime start = formatter.parseDateTime(splits[0]); - DateTime end = formatter.parseDateTime(splits[1]); - String version = splits[2]; - String trail = splits.length > 3 ? join(splits, DataSegment.delimiter, 3, splits.length) : null; - return new SegmentIdentifierParts( - dataSource, - new Interval(start.getMillis(), end.getMillis()), - version, - trail - ); + try { + DateTime start = formatter.parseDateTime(splits[0]); + DateTime end = formatter.parseDateTime(splits[1]); + String version = splits[2]; + String trail = splits.length > 3 ? join(splits, DataSegment.delimiter, 3, splits.length) : null; + + return new SegmentIdentifierParts( + dataSource, + new Interval(start.getMillis(), end.getMillis()), + version, + trail + ); + } catch (IllegalArgumentException e) { + return null; + } } public static String withInterval(final String dataSource, final String identifier, Interval newInterval) { - SegmentIdentifierParts segmentDesc = DataSegmentUtils.parse(dataSource, identifier); + SegmentIdentifierParts segmentDesc = DataSegmentUtils.valueOf(dataSource, identifier); if (segmentDesc == null) { // happens for test segments which has invalid segment id.. ignore for now LOGGER.warn("Invalid segment identifier " + identifier); diff --git a/api/src/test/java/io/druid/timeline/DataSegmentUtilsTest.java b/api/src/test/java/io/druid/timeline/DataSegmentUtilsTest.java index 80f7d0b7c0f..5ae9d1dae69 100644 --- a/api/src/test/java/io/druid/timeline/DataSegmentUtilsTest.java +++ b/api/src/test/java/io/druid/timeline/DataSegmentUtilsTest.java @@ -103,21 +103,21 @@ public class DataSegmentUtilsTest Assert.assertEquals(desc, DataSegmentUtils.valueOf(dataSource, desc.toString())); } - @Test(expected = IllegalArgumentException.class) + @Test public void testInvalidFormat0() { - DataSegmentUtils.valueOf("ds", "datasource_2015-01-02T00:00:00.000Z_2014-10-20T00:00:00.000Z_version"); + Assert.assertNull(DataSegmentUtils.valueOf("ds", "datasource_2015-01-02T00:00:00.000Z_2014-10-20T00:00:00.000Z_version")); } - @Test(expected = IllegalArgumentException.class) + @Test public void testInvalidFormat1() { - DataSegmentUtils.valueOf("datasource", "datasource_invalid_interval_version"); + Assert.assertNull(DataSegmentUtils.valueOf("datasource", "datasource_invalid_interval_version")); } - @Test(expected = IllegalArgumentException.class) + @Test public void testInvalidFormat2() { - DataSegmentUtils.valueOf("datasource", "datasource_2015-01-02T00:00:00.000Z_version"); + Assert.assertNull(DataSegmentUtils.valueOf("datasource", "datasource_2015-01-02T00:00:00.000Z_version")); } } diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java index 2c92ff1badb..86b9c20fc27 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -350,7 +351,7 @@ public class SelectQueryQueryToolChest extends QueryToolChest List filterSegments(SelectQuery query, List segments) { // at the point where this code is called, only one datasource should exist. - String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames()); + final String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames()); PagingSpec pagingSpec = query.getPagingSpec(); Map paging = pagingSpec.getPagingIdentifiers(); @@ -360,8 +361,22 @@ public class SelectQueryQueryToolChest extends QueryToolChest filteredPagingKeys = Iterables.filter( + paging.keySet(), new Predicate() + { + @Override + public boolean apply(String input) + { + return DataSegmentUtils.valueOf(dataSource, input) != null; + } + } + ); + List intervals = Lists.newArrayList( - Iterables.transform(paging.keySet(), DataSegmentUtils.INTERVAL_EXTRACTOR(dataSource)) + Iterables.transform(filteredPagingKeys, DataSegmentUtils.INTERVAL_EXTRACTOR(dataSource)) ); Collections.sort( intervals, query.isDescending() ? Comparators.intervalsByEndThenStart() diff --git a/processing/src/test/java/io/druid/query/select/MultiSegmentSelectQueryTest.java b/processing/src/test/java/io/druid/query/select/MultiSegmentSelectQueryTest.java index d70e4dbd9fd..24196d7d186 100644 --- a/processing/src/test/java/io/druid/query/select/MultiSegmentSelectQueryTest.java +++ b/processing/src/test/java/io/druid/query/select/MultiSegmentSelectQueryTest.java @@ -21,6 +21,7 @@ package io.druid.query.select; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.io.CharSource; @@ -33,6 +34,8 @@ import io.druid.query.QueryRunnerFactory; import io.druid.query.QueryRunnerTestHelper; import io.druid.query.Result; import io.druid.query.TableDataSource; +import io.druid.query.UnionDataSource; +import io.druid.query.UnionQueryRunner; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.ordering.StringComparators; import io.druid.segment.IncrementalIndexSegment; @@ -313,6 +316,38 @@ public class MultiSegmentSelectQueryTest } } + @Test + public void testPagingIdentifiersForUnionDatasource() + { + Druids.SelectQueryBuilder selectQueryBuilder = Druids + .newSelectQueryBuilder() + .dataSource( + new UnionDataSource( + ImmutableList.of( + new TableDataSource(QueryRunnerTestHelper.dataSource), + new TableDataSource("testing-2") + ) + ) + ) + .intervals(SelectQueryRunnerTest.I_0112_0114) + .granularity(QueryRunnerTestHelper.allGran) + .dimensionSpecs(DefaultDimensionSpec.toSpec(QueryRunnerTestHelper.dimensions)) + .pagingSpec(PagingSpec.newSpec(3)); + + SelectQuery query = selectQueryBuilder.build(); + QueryRunner unionQueryRunner = new UnionQueryRunner(runner); + + List> results = Sequences.toList( + unionQueryRunner.run(query, ImmutableMap.of()), + Lists.>newArrayList() + ); + + Map pagingIdentifiers = results.get(0).getValue().getPagingIdentifiers(); + query = query.withPagingSpec(toNextCursor(PagingSpec.merge(Arrays.asList(pagingIdentifiers)), query, 3)); + + Sequences.toList(unionQueryRunner.run(query, ImmutableMap.of()), Lists.>newArrayList()); + } + private PagingSpec toNextCursor(Map merged, SelectQuery query, int threshold) { if (!fromNext) {