mirror of https://github.com/apache/druid.git
Remove ParseSpec.toInputFormat() (#9815)
* Remove toInputFormat() from ParseSpec * fix test
This commit is contained in:
parent
c6caae9a24
commit
964a1fc9df
|
@ -22,7 +22,6 @@ package org.apache.druid.data.input.impl;
|
|||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.java.util.common.parsers.CSVParser;
|
||||
import org.apache.druid.java.util.common.parsers.Parser;
|
||||
|
||||
|
@ -97,12 +96,6 @@ public class CSVParseSpec extends ParseSpec
|
|||
return new CSVParser(listDelimiter, columns, hasHeaderRow, skipHeaderRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return new CsvInputFormat(columns, listDelimiter, null, hasHeaderRow, skipHeaderRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseSpec withTimestampSpec(TimestampSpec spec)
|
||||
{
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.apache.druid.data.input.impl;
|
|||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.java.util.common.parsers.DelimitedParser;
|
||||
import org.apache.druid.java.util.common.parsers.Parser;
|
||||
|
||||
|
@ -124,12 +123,6 @@ public class DelimitedParseSpec extends ParseSpec
|
|||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return new DelimitedInputFormat(columns, listDelimiter, delimiter, hasHeaderRow, null, skipHeaderRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseSpec withTimestampSpec(TimestampSpec spec)
|
||||
{
|
||||
|
|
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.core.JsonParser.Feature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.java.util.common.parsers.JSONPathParser;
|
||||
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
|
||||
import org.apache.druid.java.util.common.parsers.Parser;
|
||||
|
@ -68,12 +67,6 @@ public class JSONParseSpec extends NestedDataParseSpec<JSONPathSpec>
|
|||
return new JSONPathParser(getFlattenSpec(), objectMapper);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return new JsonInputFormat(getFlattenSpec(), getFeatureSpec());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseSpec withTimestampSpec(TimestampSpec spec)
|
||||
{
|
||||
|
|
|
@ -23,13 +23,10 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.guice.annotations.ExtensionPoint;
|
||||
import org.apache.druid.guice.annotations.PublicApi;
|
||||
import org.apache.druid.java.util.common.parsers.Parser;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
@Deprecated
|
||||
@ExtensionPoint
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "format")
|
||||
|
@ -71,16 +68,6 @@ public abstract class ParseSpec
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns null if it's not implemented yet.
|
||||
* This method (and maybe this class) will be removed in favor of {@link InputFormat} in the future.
|
||||
*/
|
||||
@Nullable
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@PublicApi
|
||||
public ParseSpec withTimestampSpec(TimestampSpec spec)
|
||||
{
|
||||
|
|
|
@ -52,7 +52,7 @@ public class FirehoseFactoryToInputSourceAdaptorTest
|
|||
}
|
||||
final TestFirehoseFactory firehoseFactory = new TestFirehoseFactory(lines);
|
||||
final StringInputRowParser inputRowParser = new StringInputRowParser(
|
||||
new UnimplementedInputFormatCsvParseSpec(
|
||||
new CSVParseSpec(
|
||||
new TimestampSpec(null, "yyyyMMdd", null),
|
||||
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "name", "score"))),
|
||||
",",
|
||||
|
@ -95,28 +95,6 @@ public class FirehoseFactoryToInputSourceAdaptorTest
|
|||
}
|
||||
}
|
||||
|
||||
private static class UnimplementedInputFormatCsvParseSpec extends CSVParseSpec
|
||||
{
|
||||
private UnimplementedInputFormatCsvParseSpec(
|
||||
TimestampSpec timestampSpec,
|
||||
DimensionsSpec dimensionsSpec,
|
||||
String listDelimiter,
|
||||
List<String> columns,
|
||||
boolean hasHeaderRow,
|
||||
int skipHeaderRows
|
||||
)
|
||||
{
|
||||
super(timestampSpec, dimensionsSpec, listDelimiter, columns, hasHeaderRow, skipHeaderRows);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestFirehoseFactory implements FiniteFirehoseFactory<StringInputRowParser, Object>
|
||||
{
|
||||
private final List<String> lines;
|
||||
|
|
|
@ -194,9 +194,7 @@ public class KafkaSupervisor extends SeekableStreamSupervisor<Integer, Long>
|
|||
true,
|
||||
minimumMessageTime,
|
||||
maximumMessageTime,
|
||||
ioConfig.getInputFormat(
|
||||
spec.getDataSchema().getParser() == null ? null : spec.getDataSchema().getParser().getParseSpec()
|
||||
)
|
||||
ioConfig.getInputFormat()
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -140,9 +140,7 @@ public class KinesisSupervisor extends SeekableStreamSupervisor<String, String>
|
|||
true,
|
||||
minimumMessageTime,
|
||||
maximumMessageTime,
|
||||
ioConfig.getInputFormat(
|
||||
spec.getDataSchema().getParser() == null ? null : spec.getDataSchema().getParser().getParseSpec()
|
||||
),
|
||||
ioConfig.getInputFormat(),
|
||||
ioConfig.getEndpoint(),
|
||||
ioConfig.getRecordsPerFetch(),
|
||||
ioConfig.getFetchDelayMillis(),
|
||||
|
|
|
@ -44,7 +44,6 @@ import org.apache.druid.data.input.InputSource;
|
|||
import org.apache.druid.data.input.InputSourceReader;
|
||||
import org.apache.druid.data.input.Rows;
|
||||
import org.apache.druid.data.input.impl.InputRowParser;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.hll.HyperLogLogCollector;
|
||||
import org.apache.druid.indexer.Checks;
|
||||
import org.apache.druid.indexer.IngestionState;
|
||||
|
@ -1046,10 +1045,7 @@ public class IndexTask extends AbstractBatchIndexTask implements ChatHandler
|
|||
|
||||
private static InputFormat getInputFormat(IndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
final InputRowParser parser = ingestionSchema.getDataSchema().getParser();
|
||||
return ingestionSchema.getIOConfig().getNonNullInputFormat(
|
||||
parser == null ? null : parser.getParseSpec()
|
||||
);
|
||||
return ingestionSchema.getIOConfig().getNonNullInputFormat();
|
||||
}
|
||||
|
||||
public static class IndexIngestionSpec extends IngestionSpec<IndexIOConfig, IndexTuningConfig>
|
||||
|
@ -1184,13 +1180,9 @@ public class IndexTask extends AbstractBatchIndexTask implements ChatHandler
|
|||
}
|
||||
}
|
||||
|
||||
public InputFormat getNonNullInputFormat(@Nullable ParseSpec parseSpec)
|
||||
public InputFormat getNonNullInputFormat()
|
||||
{
|
||||
if (inputFormat == null) {
|
||||
return Preconditions.checkNotNull(parseSpec, "parseSpec").toInputFormat();
|
||||
} else {
|
||||
return inputFormat;
|
||||
}
|
||||
return Preconditions.checkNotNull(inputFormat, "inputFormat");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.druid.data.input.InputFormat;
|
|||
import org.apache.druid.data.input.InputSource;
|
||||
import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.SplitHintSpec;
|
||||
import org.apache.druid.data.input.impl.InputRowParser;
|
||||
import org.apache.druid.data.input.impl.SplittableInputSource;
|
||||
import org.apache.druid.indexer.TaskState;
|
||||
import org.apache.druid.indexer.TaskStatusPlus;
|
||||
|
@ -196,19 +195,8 @@ public abstract class ParallelIndexPhaseRunner<SubTaskType extends Task, SubTask
|
|||
if (lastStatus != null) {
|
||||
LOG.error("Failed because of the failed sub task[%s]", lastStatus.getId());
|
||||
} else {
|
||||
final SinglePhaseSubTaskSpec spec =
|
||||
(SinglePhaseSubTaskSpec) taskCompleteEvent.getSpec();
|
||||
final InputRowParser inputRowParser = spec.getIngestionSpec().getDataSchema().getParser();
|
||||
LOG.error(
|
||||
"Failed to run sub tasks for inputSplits[%s]",
|
||||
getSplitsIfSplittable(
|
||||
spec.getIngestionSpec().getIOConfig().getNonNullInputSource(inputRowParser),
|
||||
spec.getIngestionSpec().getIOConfig().getNonNullInputFormat(
|
||||
inputRowParser == null ? null : inputRowParser.getParseSpec()
|
||||
),
|
||||
tuningConfig.getSplitHintSpec()
|
||||
)
|
||||
);
|
||||
final SinglePhaseSubTaskSpec spec = (SinglePhaseSubTaskSpec) taskCompleteEvent.getSpec();
|
||||
LOG.error("Failed to run sub tasks for inputSplits[%s]", spec.getInputSplit());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.druid.client.indexing.IndexingServiceClient;
|
|||
import org.apache.druid.data.input.FiniteFirehoseFactory;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.InputSource;
|
||||
import org.apache.druid.data.input.impl.InputRowParser;
|
||||
import org.apache.druid.indexer.TaskState;
|
||||
import org.apache.druid.indexer.TaskStatus;
|
||||
import org.apache.druid.indexer.partitions.PartitionsSpec;
|
||||
|
@ -1001,10 +1000,7 @@ public class ParallelIndexSupervisorTask extends AbstractBatchIndexTask implemen
|
|||
|
||||
static InputFormat getInputFormat(ParallelIndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
final InputRowParser parser = ingestionSchema.getDataSchema().getParser();
|
||||
return ingestionSchema.getIOConfig().getNonNullInputFormat(
|
||||
parser == null ? null : parser.getParseSpec()
|
||||
);
|
||||
return ingestionSchema.getIOConfig().getNonNullInputFormat();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Optional;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.segment.indexing.IOConfig;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
|
@ -127,8 +126,8 @@ public abstract class SeekableStreamIndexTaskIOConfig<PartitionIdType, SequenceO
|
|||
}
|
||||
|
||||
@Nullable
|
||||
public InputFormat getInputFormat(ParseSpec parseSpec)
|
||||
public InputFormat getInputFormat()
|
||||
{
|
||||
return inputFormat == null ? Preconditions.checkNotNull(parseSpec, "parseSpec").toInputFormat() : inputFormat;
|
||||
return inputFormat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -255,7 +255,7 @@ public abstract class SeekableStreamIndexTaskRunner<PartitionIdType, SequenceOff
|
|||
.map(AggregatorFactory::getName)
|
||||
.collect(Collectors.toList())
|
||||
);
|
||||
this.inputFormat = ioConfig.getInputFormat(parser == null ? null : parser.getParseSpec());
|
||||
this.inputFormat = ioConfig.getInputFormat();
|
||||
this.parser = parser;
|
||||
this.authorizerMapper = authorizerMapper;
|
||||
this.chatHandlerProvider = chatHandlerProvider;
|
||||
|
|
|
@ -98,7 +98,7 @@ public abstract class SeekableStreamSamplerSpec<PartitionIdType, SequenceOffsetT
|
|||
ioConfig.isUseEarliestSequenceNumber()
|
||||
);
|
||||
inputFormat = Preconditions.checkNotNull(
|
||||
ioConfig.getInputFormat(null),
|
||||
ioConfig.getInputFormat(),
|
||||
"[spec.ioConfig.inputFormat] is required"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Optional;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.Duration;
|
||||
|
@ -102,22 +101,12 @@ public abstract class SeekableStreamSupervisorIOConfig
|
|||
}
|
||||
|
||||
@Nullable
|
||||
@JsonProperty("inputFormat")
|
||||
private InputFormat getGivenInputFormat()
|
||||
@JsonProperty()
|
||||
public InputFormat getInputFormat()
|
||||
{
|
||||
return inputFormat;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public InputFormat getInputFormat(@Nullable ParseSpec parseSpec)
|
||||
{
|
||||
if (inputFormat == null) {
|
||||
return Preconditions.checkNotNull(parseSpec, "parseSpec").toInputFormat();
|
||||
} else {
|
||||
return inputFormat;
|
||||
}
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public Integer getReplicas()
|
||||
{
|
||||
|
|
|
@ -176,6 +176,7 @@ public class CompactionTaskParallelRunTest extends AbstractParallelIndexSupervis
|
|||
getObjectMapper(),
|
||||
tmpDir,
|
||||
CompactionTaskRunTest.DEFAULT_PARSE_SPEC,
|
||||
null,
|
||||
new UniformGranularitySpec(
|
||||
Granularities.HOUR,
|
||||
Granularities.MINUTE,
|
||||
|
|
|
@ -381,6 +381,7 @@ public class CompactionTaskRunTest extends IngestionTestBase
|
|||
getObjectMapper(),
|
||||
tmpDir,
|
||||
DEFAULT_PARSE_SPEC,
|
||||
null,
|
||||
new UniformGranularitySpec(
|
||||
Granularities.HOUR,
|
||||
Granularities.MINUTE,
|
||||
|
@ -791,6 +792,7 @@ public class CompactionTaskRunTest extends IngestionTestBase
|
|||
getObjectMapper(),
|
||||
tmpDir,
|
||||
DEFAULT_PARSE_SPEC,
|
||||
null,
|
||||
new UniformGranularitySpec(
|
||||
Granularities.HOUR,
|
||||
Granularities.MINUTE,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -19,10 +19,14 @@
|
|||
|
||||
package org.apache.druid.indexing.common.task.batch.parallel;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.MaxSizeSplitHintSpec;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.LocalInputSource;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.data.input.impl.StringInputRowParser;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
import org.apache.druid.indexer.TaskState;
|
||||
import org.apache.druid.indexer.TaskStatus;
|
||||
import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
|
||||
|
@ -55,6 +59,7 @@ import org.apache.druid.timeline.DataSegment;
|
|||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -82,8 +87,16 @@ abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIn
|
|||
this.useInputFormatApi = useInputFormatApi;
|
||||
}
|
||||
|
||||
boolean isUseInputFormatApi()
|
||||
{
|
||||
return useInputFormatApi;
|
||||
}
|
||||
|
||||
Set<DataSegment> runTestTask(
|
||||
ParseSpec parseSpec,
|
||||
@Nullable TimestampSpec timestampSpec,
|
||||
@Nullable DimensionsSpec dimensionsSpec,
|
||||
@Nullable InputFormat inputFormat,
|
||||
@Nullable ParseSpec parseSpec,
|
||||
Interval interval,
|
||||
File inputDir,
|
||||
String filter,
|
||||
|
@ -93,6 +106,9 @@ abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIn
|
|||
)
|
||||
{
|
||||
final ParallelIndexSupervisorTask task = newTask(
|
||||
timestampSpec,
|
||||
dimensionsSpec,
|
||||
inputFormat,
|
||||
parseSpec,
|
||||
interval,
|
||||
inputDir,
|
||||
|
@ -108,7 +124,10 @@ abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIn
|
|||
}
|
||||
|
||||
private ParallelIndexSupervisorTask newTask(
|
||||
ParseSpec parseSpec,
|
||||
@Nullable TimestampSpec timestampSpec,
|
||||
@Nullable DimensionsSpec dimensionsSpec,
|
||||
@Nullable InputFormat inputFormat,
|
||||
@Nullable ParseSpec parseSpec,
|
||||
Interval interval,
|
||||
File inputDir,
|
||||
String filter,
|
||||
|
@ -154,17 +173,18 @@ abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIn
|
|||
final ParallelIndexIngestionSpec ingestionSpec;
|
||||
|
||||
if (useInputFormatApi) {
|
||||
Preconditions.checkArgument(parseSpec == null);
|
||||
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(
|
||||
null,
|
||||
new LocalInputSource(inputDir, filter),
|
||||
parseSpec.toInputFormat(),
|
||||
inputFormat,
|
||||
false
|
||||
);
|
||||
ingestionSpec = new ParallelIndexIngestionSpec(
|
||||
new DataSchema(
|
||||
"dataSource",
|
||||
parseSpec.getTimestampSpec(),
|
||||
parseSpec.getDimensionsSpec(),
|
||||
timestampSpec,
|
||||
dimensionsSpec,
|
||||
new AggregatorFactory[]{
|
||||
new LongSumAggregatorFactory("val", "val")
|
||||
},
|
||||
|
@ -175,6 +195,7 @@ abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIn
|
|||
tuningConfig
|
||||
);
|
||||
} else {
|
||||
Preconditions.checkArgument(inputFormat == null);
|
||||
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(
|
||||
new LocalFirehoseFactory(inputDir, filter, null),
|
||||
false
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.druid.client.indexing.NoopIndexingServiceClient;
|
|||
import org.apache.druid.client.indexing.TaskStatusResponse;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.impl.CSVParseSpec;
|
||||
import org.apache.druid.data.input.impl.CsvInputFormat;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
|
@ -131,7 +132,13 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase
|
|||
false,
|
||||
0
|
||||
);
|
||||
static final InputFormat DEFAULT_INPUT_FORMAT = DEFAULT_PARSE_SPEC.toInputFormat();
|
||||
static final InputFormat DEFAULT_INPUT_FORMAT = new CsvInputFormat(
|
||||
Arrays.asList("ts", "dim", "val"),
|
||||
null,
|
||||
false,
|
||||
false,
|
||||
0
|
||||
);
|
||||
static final ParallelIndexTuningConfig DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING = new ParallelIndexTuningConfig(
|
||||
null,
|
||||
null,
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
package org.apache.druid.indexing.common.task.batch.parallel;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.impl.CSVParseSpec;
|
||||
import org.apache.druid.data.input.impl.CsvInputFormat;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
|
@ -54,18 +56,25 @@ import java.util.Set;
|
|||
@RunWith(Parameterized.class)
|
||||
public class HashPartitionMultiPhaseParallelIndexingTest extends AbstractMultiPhaseParallelIndexingTest
|
||||
{
|
||||
private static final TimestampSpec TIMESTAMP_SPEC = new TimestampSpec("ts", "auto", null);
|
||||
private static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec(
|
||||
DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim1", "dim2"))
|
||||
);
|
||||
private static final ParseSpec PARSE_SPEC = new CSVParseSpec(
|
||||
new TimestampSpec(
|
||||
"ts",
|
||||
"auto",
|
||||
null
|
||||
),
|
||||
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim1", "dim2"))),
|
||||
TIMESTAMP_SPEC,
|
||||
DIMENSIONS_SPEC,
|
||||
null,
|
||||
Arrays.asList("ts", "dim1", "dim2", "val"),
|
||||
false,
|
||||
0
|
||||
);
|
||||
private static final InputFormat INPUT_FORMAT = new CsvInputFormat(
|
||||
Arrays.asList("ts", "dim1", "dim2", "val"),
|
||||
null,
|
||||
false,
|
||||
false,
|
||||
0
|
||||
);
|
||||
private static final int MAX_NUM_CONCURRENT_SUB_TASKS = 2;
|
||||
private static final Interval INTERVAL_TO_INDEX = Intervals.of("2017-12/P1M");
|
||||
|
||||
|
@ -112,15 +121,34 @@ public class HashPartitionMultiPhaseParallelIndexingTest extends AbstractMultiPh
|
|||
@Test
|
||||
public void testRun() throws Exception
|
||||
{
|
||||
final Set<DataSegment> publishedSegments = runTestTask(
|
||||
PARSE_SPEC,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
"test_*",
|
||||
new HashedPartitionsSpec(null, 2, ImmutableList.of("dim1", "dim2")),
|
||||
MAX_NUM_CONCURRENT_SUB_TASKS,
|
||||
TaskState.SUCCESS
|
||||
);
|
||||
final Set<DataSegment> publishedSegments;
|
||||
if (isUseInputFormatApi()) {
|
||||
publishedSegments = runTestTask(
|
||||
TIMESTAMP_SPEC,
|
||||
DIMENSIONS_SPEC,
|
||||
INPUT_FORMAT,
|
||||
null,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
"test_*",
|
||||
new HashedPartitionsSpec(null, 2, ImmutableList.of("dim1", "dim2")),
|
||||
MAX_NUM_CONCURRENT_SUB_TASKS,
|
||||
TaskState.SUCCESS
|
||||
);
|
||||
} else {
|
||||
publishedSegments = runTestTask(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
PARSE_SPEC,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
"test_*",
|
||||
new HashedPartitionsSpec(null, 2, ImmutableList.of("dim1", "dim2")),
|
||||
MAX_NUM_CONCURRENT_SUB_TASKS,
|
||||
TaskState.SUCCESS
|
||||
);
|
||||
}
|
||||
assertHashedPartition(publishedSegments);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,9 @@ import com.google.common.collect.ImmutableList;
|
|||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import org.apache.druid.common.config.NullValueHandlingConfig;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.impl.CSVParseSpec;
|
||||
import org.apache.druid.data.input.impl.CsvInputFormat;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.ParseSpec;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
|
@ -79,18 +81,25 @@ public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiP
|
|||
private static final String LIST_DELIMITER = "|";
|
||||
private static final List<String> DIMS = ImmutableList.of(DIM1, DIM2);
|
||||
private static final String TEST_FILE_NAME_PREFIX = "test_";
|
||||
private static final TimestampSpec TIMESTAMP_SPEC = new TimestampSpec(TIME, "auto", null);
|
||||
private static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec(
|
||||
DimensionsSpec.getDefaultSchemas(Arrays.asList(TIME, DIM1, DIM2))
|
||||
);
|
||||
private static final ParseSpec PARSE_SPEC = new CSVParseSpec(
|
||||
new TimestampSpec(
|
||||
TIME,
|
||||
"auto",
|
||||
null
|
||||
),
|
||||
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TIME, DIM1, DIM2))),
|
||||
TIMESTAMP_SPEC,
|
||||
DIMENSIONS_SPEC,
|
||||
LIST_DELIMITER,
|
||||
Arrays.asList(TIME, DIM1, DIM2, "val"),
|
||||
false,
|
||||
0
|
||||
);
|
||||
private static final InputFormat INPUT_FORMAT = new CsvInputFormat(
|
||||
Arrays.asList(TIME, DIM1, DIM2, "val"),
|
||||
LIST_DELIMITER,
|
||||
false,
|
||||
false,
|
||||
0
|
||||
);
|
||||
|
||||
@Parameterized.Parameters(name = "{0}, useInputFormatApi={1}, maxNumConcurrentSubTasks={2}, useMultiValueDim={3}")
|
||||
public static Iterable<Object[]> constructorFeeder()
|
||||
|
@ -192,20 +201,44 @@ public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiP
|
|||
public void createsCorrectRangePartitions() throws Exception
|
||||
{
|
||||
int targetRowsPerSegment = NUM_ROW / DIM_FILE_CARDINALITY / NUM_PARTITION;
|
||||
final Set<DataSegment> publishedSegments = runTestTask(
|
||||
PARSE_SPEC,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
TEST_FILE_NAME_PREFIX + "*",
|
||||
new SingleDimensionPartitionsSpec(
|
||||
targetRowsPerSegment,
|
||||
null,
|
||||
DIM1,
|
||||
false
|
||||
),
|
||||
maxNumConcurrentSubTasks,
|
||||
useMultivalueDim ? TaskState.FAILED : TaskState.SUCCESS
|
||||
);
|
||||
final Set<DataSegment> publishedSegments;
|
||||
if (isUseInputFormatApi()) {
|
||||
publishedSegments = runTestTask(
|
||||
TIMESTAMP_SPEC,
|
||||
DIMENSIONS_SPEC,
|
||||
INPUT_FORMAT,
|
||||
null,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
TEST_FILE_NAME_PREFIX + "*",
|
||||
new SingleDimensionPartitionsSpec(
|
||||
targetRowsPerSegment,
|
||||
null,
|
||||
DIM1,
|
||||
false
|
||||
),
|
||||
maxNumConcurrentSubTasks,
|
||||
useMultivalueDim ? TaskState.FAILED : TaskState.SUCCESS
|
||||
);
|
||||
} else {
|
||||
publishedSegments = runTestTask(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
PARSE_SPEC,
|
||||
INTERVAL_TO_INDEX,
|
||||
inputDir,
|
||||
TEST_FILE_NAME_PREFIX + "*",
|
||||
new SingleDimensionPartitionsSpec(
|
||||
targetRowsPerSegment,
|
||||
null,
|
||||
DIM1,
|
||||
false
|
||||
),
|
||||
maxNumConcurrentSubTasks,
|
||||
useMultivalueDim ? TaskState.FAILED : TaskState.SUCCESS
|
||||
);
|
||||
}
|
||||
|
||||
if (!useMultivalueDim) {
|
||||
assertRangePartitions(publishedSegments);
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
|
||||
package org.apache.druid.indexing.seekablestream;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||
|
@ -87,7 +85,6 @@ import org.easymock.EasyMockSupport;
|
|||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
@ -149,12 +146,12 @@ public class SeekableStreamIndexTaskTestBase extends EasyMockSupport
|
|||
|
||||
static {
|
||||
OBJECT_MAPPER = new TestUtils().getTestObjectMapper();
|
||||
OBJECT_MAPPER.registerSubtypes(new NamedType(UnimplementedInputFormatJsonParseSpec.class, "json"));
|
||||
OBJECT_MAPPER.registerSubtypes(new NamedType(JSONParseSpec.class, "json"));
|
||||
OLD_DATA_SCHEMA = new DataSchema(
|
||||
"test_ds",
|
||||
OBJECT_MAPPER.convertValue(
|
||||
new StringInputRowParser(
|
||||
new UnimplementedInputFormatJsonParseSpec(
|
||||
new JSONParseSpec(
|
||||
new TimestampSpec("timestamp", "iso", null),
|
||||
new DimensionsSpec(
|
||||
Arrays.asList(
|
||||
|
@ -443,25 +440,4 @@ public class SeekableStreamIndexTaskTestBase extends EasyMockSupport
|
|||
return segmentDescriptor;
|
||||
}
|
||||
}
|
||||
|
||||
private static class UnimplementedInputFormatJsonParseSpec extends JSONParseSpec
|
||||
{
|
||||
@JsonCreator
|
||||
private UnimplementedInputFormatJsonParseSpec(
|
||||
@JsonProperty("timestampSpec") TimestampSpec timestampSpec,
|
||||
@JsonProperty("dimensionsSpec") DimensionsSpec dimensionsSpec,
|
||||
@JsonProperty("flattenSpec") JSONPathSpec flattenSpec,
|
||||
@JsonProperty("featureSpec") Map<String, Boolean> featureSpec
|
||||
)
|
||||
{
|
||||
super(timestampSpec, dimensionsSpec, flattenSpec, featureSpec);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.apache.druid.indexing.seekablestream;
|
|||
import com.google.common.collect.Iterables;
|
||||
import org.apache.druid.data.input.InputEntity;
|
||||
import org.apache.druid.data.input.InputEntityReader;
|
||||
import org.apache.druid.data.input.InputFormat;
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.data.input.InputRowSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
|
@ -63,7 +62,12 @@ public class StreamChunkParserTest
|
|||
public void testWithParserAndNullInputformatParseProperly() throws IOException
|
||||
{
|
||||
final InputRowParser<ByteBuffer> parser = new StringInputRowParser(
|
||||
new NotConvertibleToInputFormatParseSpec(),
|
||||
new JSONParseSpec(
|
||||
TIMESTAMP_SPEC,
|
||||
DimensionsSpec.EMPTY,
|
||||
JSONPathSpec.DEFAULT,
|
||||
Collections.emptyMap()
|
||||
),
|
||||
StringUtils.UTF8_STRING
|
||||
);
|
||||
final StreamChunkParser chunkParser = new StreamChunkParser(
|
||||
|
@ -109,7 +113,12 @@ public class StreamChunkParserTest
|
|||
public void testBothParserAndInputFormatParseProperlyUsingInputFormat() throws IOException
|
||||
{
|
||||
final InputRowParser<ByteBuffer> parser = new StringInputRowParser(
|
||||
new NotConvertibleToInputFormatParseSpec(),
|
||||
new JSONParseSpec(
|
||||
TIMESTAMP_SPEC,
|
||||
DimensionsSpec.EMPTY,
|
||||
JSONPathSpec.DEFAULT,
|
||||
Collections.emptyMap()
|
||||
),
|
||||
StringUtils.UTF8_STRING
|
||||
);
|
||||
final TrackingJsonInputFormat inputFormat = new TrackingJsonInputFormat(
|
||||
|
@ -138,25 +147,6 @@ public class StreamChunkParserTest
|
|||
Assert.assertEquals("val2", Iterables.getOnlyElement(row.getDimension("met")));
|
||||
}
|
||||
|
||||
private static class NotConvertibleToInputFormatParseSpec extends JSONParseSpec
|
||||
{
|
||||
private NotConvertibleToInputFormatParseSpec()
|
||||
{
|
||||
super(
|
||||
TIMESTAMP_SPEC,
|
||||
DimensionsSpec.EMPTY,
|
||||
JSONPathSpec.DEFAULT,
|
||||
Collections.emptyMap()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputFormat toInputFormat()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static class TrackingJsonInputFormat extends JsonInputFormat
|
||||
{
|
||||
private boolean used;
|
||||
|
|
|
@ -1027,9 +1027,7 @@ public class SeekableStreamSupervisorStateTest extends EasyMockSupport
|
|||
true,
|
||||
minimumMessageTime,
|
||||
maximumMessageTime,
|
||||
ioConfig.getInputFormat(
|
||||
getDataSchema().getParser() == null ? null : getDataSchema().getParser().getParseSpec()
|
||||
)
|
||||
ioConfig.getInputFormat()
|
||||
)
|
||||
{
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue