From 277b35725619d6d6b63b5c925a72ffb19a37d703 Mon Sep 17 00:00:00 2001 From: imply-cheddar <86940447+imply-cheddar@users.noreply.github.com> Date: Thu, 6 Jul 2023 18:14:23 +0900 Subject: [PATCH] Optimize IntervalIterator (#14530) UniformGranularityTest's test to test a large number of intervals runs through 10 years of 1 second intervals. This pushes a lot of stuff through IntervalIterator and shows up in terms of test runtime as one of the hottest tests. Most of the time is going to constructing jodatime objects because it is doing things with DateTime objects instead of millis. Change the calls to use millis instead and things go faster. --- .../DetermineHashedPartitionsJobTest.java | 29 ++++++++++++------- .../util/common/granularity/Granularity.java | 15 ++++++---- .../granularity/IntervalsByGranularity.java | 17 ++++++----- .../common/granularity/PeriodGranularity.java | 1 + .../granularity/UniformGranularityTest.java | 3 +- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index d80a60306ca..5f7b0157fba 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -47,6 +47,7 @@ import org.junit.runners.Parameterized; import javax.annotation.Nullable; import java.io.File; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -57,20 +58,17 @@ public class DetermineHashedPartitionsJobTest { private HadoopDruidIndexerConfig indexerConfig; private int expectedNumTimeBuckets; - private int[] expectedNumOfShards; + private ArrayList expectedNumOfShards; private int errorMargin; @Parameterized.Parameters(name = "File={0}, TargetPartitionSize={1}, Interval={2}, ErrorMargin={3}, NumTimeBuckets={4}, NumShards={5}, SegmentGranularity={6}") public static Collection data() { - int[] first = new int[1]; - Arrays.fill(first, 13); - int[] second = new int[6]; - Arrays.fill(second, 1); - int[] third = new int[6]; - Arrays.fill(third, 13); - third[2] = 12; - third[5] = 11; + ArrayList first = makeListOf(1, 13); + ArrayList second = makeListOf(6, 1); + ArrayList third = makeListOf(6, 13); + third.set(2, 12); + third.set(5, 11); return Arrays.asList( new Object[][]{ @@ -144,7 +142,7 @@ public class DetermineHashedPartitionsJobTest String interval, int errorMargin, int expectedNumTimeBuckets, - int[] expectedNumOfShards, + ArrayList expectedNumOfShards, Granularity segmentGranularity, @Nullable HashPartitionFunction partitionFunction ) @@ -254,7 +252,7 @@ public class DetermineHashedPartitionsJobTest int i = 0; for (Map.Entry> entry : shardSpecs.entrySet()) { Assert.assertEquals( - expectedNumOfShards[i++], + expectedNumOfShards.get(i++), entry.getValue().size(), errorMargin ); @@ -264,4 +262,13 @@ public class DetermineHashedPartitionsJobTest } } } + + private static ArrayList makeListOf(int capacity, int value) + { + ArrayList retVal = new ArrayList<>(); + for (int i = 0; i < capacity; ++i) { + retVal.add(value); + } + return retVal; + } } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java b/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java index 6f5d9fb61e1..ca307886a79 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java @@ -150,6 +150,11 @@ public abstract class Granularity implements Cacheable */ public abstract boolean isAligned(Interval interval); + public DateTimeZone getTimeZone() + { + return DateTimeZone.UTC; + } + public DateTime bucketEnd(DateTime time) { return increment(bucketStart(time)); @@ -255,21 +260,21 @@ public abstract class Granularity implements Cacheable { private final Interval inputInterval; - private DateTime currStart; - private DateTime currEnd; + private long currStart; + private long currEnd; private IntervalIterator(Interval inputInterval) { this.inputInterval = inputInterval; - currStart = bucketStart(inputInterval.getStart()); + currStart = bucketStart(inputInterval.getStartMillis()); currEnd = increment(currStart); } @Override public boolean hasNext() { - return currStart.isBefore(inputInterval.getEnd()); + return currStart < inputInterval.getEndMillis(); } @Override @@ -278,7 +283,7 @@ public abstract class Granularity implements Cacheable if (!hasNext()) { throw new NoSuchElementException("There are no more intervals"); } - Interval retVal = new Interval(currStart, currEnd); + Interval retVal = new Interval(currStart, currEnd, getTimeZone()); currStart = currEnd; currEnd = increment(currStart); diff --git a/processing/src/main/java/org/apache/druid/java/util/common/granularity/IntervalsByGranularity.java b/processing/src/main/java/org/apache/druid/java/util/common/granularity/IntervalsByGranularity.java index ea742a4ec8d..1e5f0240fd7 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/granularity/IntervalsByGranularity.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/granularity/IntervalsByGranularity.java @@ -68,14 +68,15 @@ public class IntervalsByGranularity // intervals will be returned, both with the same value 2013-01-01T00:00:00.000Z/2013-02-01T00:00:00.000Z. // Thus dups can be created given the right conditions.... final SettableSupplier previous = new SettableSupplier<>(); - return FluentIterable.from(sortedNonOverlappingIntervals).transformAndConcat(granularity::getIterable) - .filter(interval -> { - if (previous.get() != null && previous.get().equals(interval)) { - return false; - } - previous.set(interval); - return true; - }).iterator(); + return FluentIterable.from(sortedNonOverlappingIntervals) + .transformAndConcat(granularity::getIterable) + .filter(interval -> { + if (previous.get() != null && previous.get().equals(interval)) { + return false; + } + previous.set(interval); + return true; + }).iterator(); } } } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/granularity/PeriodGranularity.java b/processing/src/main/java/org/apache/druid/java/util/common/granularity/PeriodGranularity.java index 09960e33cda..4f9acefd284 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/granularity/PeriodGranularity.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/granularity/PeriodGranularity.java @@ -79,6 +79,7 @@ public class PeriodGranularity extends Granularity implements JsonSerializable return period; } + @Override @JsonProperty("timeZone") public DateTimeZone getTimeZone() { diff --git a/server/src/test/java/org/apache/druid/segment/indexing/granularity/UniformGranularityTest.java b/server/src/test/java/org/apache/druid/segment/indexing/granularity/UniformGranularityTest.java index 0c19724883e..5ec95774729 100644 --- a/server/src/test/java/org/apache/druid/segment/indexing/granularity/UniformGranularityTest.java +++ b/server/src/test/java/org/apache/druid/segment/indexing/granularity/UniformGranularityTest.java @@ -27,6 +27,7 @@ import com.google.common.collect.Lists; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.DurationGranularity; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.joda.time.Interval; @@ -343,7 +344,7 @@ public class UniformGranularityTest { // just make sure that intervals for uniform spec are not materialized (causing OOM) when created final GranularitySpec spec = new UniformGranularitySpec( - Granularities.SECOND, + new DurationGranularity(1000, 0), null, Collections.singletonList( Intervals.of("2012-01-01T00Z/P10Y")