mirror of https://github.com/apache/druid.git
Fix IngestSegmentFirehoseFactory (#4069)
This commit is contained in:
parent
84fe91ba0b
commit
6febcd9f24
|
@ -55,6 +55,7 @@ import io.druid.indexing.common.actions.LockAcquireAction;
|
|||
import io.druid.indexing.common.actions.LockTryAcquireAction;
|
||||
import io.druid.indexing.common.actions.SegmentTransactionalInsertAction;
|
||||
import io.druid.indexing.common.actions.TaskActionClient;
|
||||
import io.druid.indexing.firehose.IngestSegmentFirehoseFactory;
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.granularity.Granularity;
|
||||
import io.druid.java.util.common.guava.Comparators;
|
||||
|
@ -168,6 +169,12 @@ public class IndexTask extends AbstractTask
|
|||
.isPresent();
|
||||
|
||||
final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
|
||||
|
||||
if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
|
||||
// pass toolbox to Firehose
|
||||
((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
|
||||
}
|
||||
|
||||
final FirehoseFactory firehoseFactory;
|
||||
if (ingestionSchema.getIOConfig().isSkipFirehoseCaching()
|
||||
|| delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
|
||||
|
@ -290,7 +297,10 @@ public class IndexTask extends AbstractTask
|
|||
hllCollectors.put(interval, Optional.of(HyperLogLogCollector.makeLatestCollector()));
|
||||
}
|
||||
|
||||
List<Object> groupKey = Rows.toGroupKey(queryGranularity.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow);
|
||||
List<Object> groupKey = Rows.toGroupKey(
|
||||
queryGranularity.bucketStart(inputRow.getTimestamp()).getMillis(),
|
||||
inputRow
|
||||
);
|
||||
hllCollectors.get(interval).get().add(hashFunction.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asBytes());
|
||||
}
|
||||
}
|
||||
|
@ -385,7 +395,12 @@ public class IndexTask extends AbstractTask
|
|||
|
||||
try (
|
||||
final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
|
||||
final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
|
||||
final FiniteAppenderatorDriver driver = newDriver(
|
||||
appenderator,
|
||||
toolbox,
|
||||
segmentAllocator,
|
||||
fireDepartmentMetrics
|
||||
);
|
||||
final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())
|
||||
) {
|
||||
final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
|
||||
|
|
|
@ -68,6 +68,7 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
private final List<String> metrics;
|
||||
private final Injector injector;
|
||||
private final IndexIO indexIO;
|
||||
private TaskToolbox taskToolbox;
|
||||
|
||||
@JsonCreator
|
||||
public IngestSegmentFirehoseFactory(
|
||||
|
@ -121,21 +122,28 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
return metrics;
|
||||
}
|
||||
|
||||
public void setTaskToolbox(TaskToolbox taskToolbox)
|
||||
{
|
||||
this.taskToolbox = taskToolbox;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException
|
||||
{
|
||||
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
|
||||
// better way to achieve this is to pass toolbox to Firehose, The instance is initialized Lazily on connect method.
|
||||
// Noop Task is just used to create the toolbox and list segments.
|
||||
final TaskToolbox toolbox = injector.getInstance(TaskToolboxFactory.class).build(
|
||||
new NoopTask("reingest", 0, 0, null, null, null)
|
||||
);
|
||||
|
||||
if (taskToolbox == null) {
|
||||
// Noop Task is just used to create the toolbox and list segments.
|
||||
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(
|
||||
new NoopTask("reingest", 0, 0, null, null, null)
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
final List<DataSegment> usedSegments = toolbox
|
||||
final List<DataSegment> usedSegments = taskToolbox
|
||||
.getTaskActionClient()
|
||||
.submit(new SegmentListUsedAction(dataSource, interval, null));
|
||||
final Map<DataSegment, File> segmentFileMap = toolbox.fetchSegments(usedSegments);
|
||||
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
|
||||
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(
|
||||
Ordering.<String>natural().nullsFirst()
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue