mirror of https://github.com/apache/druid.git
Decorate sampling response with system fields if specified (#15536)
* * decorate sampling response with system fields if specified * * add unit test
This commit is contained in:
parent
f32dbd4131
commit
857693f5cf
|
@ -34,6 +34,7 @@ import org.apache.druid.java.util.common.parsers.CloseableIterator;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* InputSourceReader iterating multiple {@link InputEntity}s. This class could be used for
|
* InputSourceReader iterating multiple {@link InputEntity}s. This class could be used for
|
||||||
|
@ -86,9 +87,17 @@ public class InputEntityIteratingReader implements InputSourceReader
|
||||||
{
|
{
|
||||||
return createIterator(entity -> {
|
return createIterator(entity -> {
|
||||||
// InputEntityReader is stateful and so a new one should be created per entity.
|
// InputEntityReader is stateful and so a new one should be created per entity.
|
||||||
|
final Function<InputRow, InputRow> systemFieldDecorator = systemFieldDecoratorFactory.decorator(entity);
|
||||||
try {
|
try {
|
||||||
final InputEntityReader reader = inputFormat.createReader(inputRowSchema, entity, temporaryDirectory);
|
final InputEntityReader reader = inputFormat.createReader(inputRowSchema, entity, temporaryDirectory);
|
||||||
return reader.sample();
|
return reader.sample()
|
||||||
|
.map(i -> InputRowListPlusRawValues.ofList(i.getRawValuesList(),
|
||||||
|
i.getInputRows() == null
|
||||||
|
? null
|
||||||
|
: i.getInputRows().stream().map(
|
||||||
|
systemFieldDecorator).collect(Collectors.toList()),
|
||||||
|
i.getParseException()
|
||||||
|
));
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -23,9 +23,12 @@ import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import org.apache.druid.data.input.ColumnsFilter;
|
import org.apache.druid.data.input.ColumnsFilter;
|
||||||
import org.apache.druid.data.input.InputRow;
|
import org.apache.druid.data.input.InputRow;
|
||||||
|
import org.apache.druid.data.input.InputRowListPlusRawValues;
|
||||||
import org.apache.druid.data.input.InputRowSchema;
|
import org.apache.druid.data.input.InputRowSchema;
|
||||||
import org.apache.druid.data.input.InputStats;
|
import org.apache.druid.data.input.InputStats;
|
||||||
|
import org.apache.druid.data.input.impl.systemfield.SystemField;
|
||||||
import org.apache.druid.data.input.impl.systemfield.SystemFieldDecoratorFactory;
|
import org.apache.druid.data.input.impl.systemfield.SystemFieldDecoratorFactory;
|
||||||
|
import org.apache.druid.data.input.impl.systemfield.SystemFields;
|
||||||
import org.apache.druid.java.util.common.CloseableIterators;
|
import org.apache.druid.java.util.common.CloseableIterators;
|
||||||
import org.apache.druid.java.util.common.DateTimes;
|
import org.apache.druid.java.util.common.DateTimes;
|
||||||
import org.apache.druid.java.util.common.StringUtils;
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
|
@ -44,6 +47,7 @@ import java.net.URISyntaxException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class InputEntityIteratingReaderTest extends InitializedNullHandlingTest
|
public class InputEntityIteratingReaderTest extends InitializedNullHandlingTest
|
||||||
|
@ -110,6 +114,76 @@ public class InputEntityIteratingReaderTest extends InitializedNullHandlingTest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSampleWithSystemFields() throws IOException
|
||||||
|
{
|
||||||
|
final int numFiles = 5;
|
||||||
|
final List<File> files = new ArrayList<>();
|
||||||
|
for (int i = 0; i < numFiles; i++) {
|
||||||
|
final File file = temporaryFolder.newFile("test_" + i);
|
||||||
|
files.add(file);
|
||||||
|
try (Writer writer = Files.newBufferedWriter(file.toPath(), StandardCharsets.UTF_8)) {
|
||||||
|
writer.write(StringUtils.format("%d,%s,%d\n", 20190101 + i, "name_" + i, i));
|
||||||
|
writer.write(StringUtils.format("%d,%s,%d", 20190102 + i, "name_" + (i + 1), i + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LocalInputSource inputSource = new LocalInputSource(
|
||||||
|
temporaryFolder.getRoot(),
|
||||||
|
"test_*",
|
||||||
|
null,
|
||||||
|
new SystemFields(EnumSet.of(SystemField.URI, SystemField.PATH)));
|
||||||
|
final InputEntityIteratingReader reader = new InputEntityIteratingReader(
|
||||||
|
new InputRowSchema(
|
||||||
|
new TimestampSpec("time", "yyyyMMdd", null),
|
||||||
|
new DimensionsSpec(
|
||||||
|
DimensionsSpec.getDefaultSchemas(ImmutableList.of(
|
||||||
|
"time",
|
||||||
|
"name",
|
||||||
|
"score",
|
||||||
|
SystemField.URI.getFieldName(),
|
||||||
|
SystemField.PATH.getFieldName()
|
||||||
|
))
|
||||||
|
),
|
||||||
|
ColumnsFilter.all()
|
||||||
|
),
|
||||||
|
new CsvInputFormat(
|
||||||
|
ImmutableList.of("time", "name", "score"),
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
0
|
||||||
|
),
|
||||||
|
CloseableIterators.withEmptyBaggage(
|
||||||
|
files.stream().flatMap(file -> ImmutableList.of(new FileEntity(file)).stream()).iterator()
|
||||||
|
),
|
||||||
|
SystemFieldDecoratorFactory.fromInputSource(inputSource),
|
||||||
|
temporaryFolder.newFolder()
|
||||||
|
);
|
||||||
|
|
||||||
|
try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
|
||||||
|
int i = 0;
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
InputRow row = iterator.next().getInputRows().get(0);
|
||||||
|
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 1)), row.getTimestamp());
|
||||||
|
Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(row.getDimension("name")));
|
||||||
|
Assert.assertEquals(Integer.toString(i), Iterables.getOnlyElement(row.getDimension("score")));
|
||||||
|
Assert.assertEquals(files.get(i).toURI().toString(), row.getDimension(SystemField.URI.getFieldName()).get(0));
|
||||||
|
Assert.assertEquals(files.get(i).getAbsolutePath(), row.getDimension(SystemField.PATH.getFieldName()).get(0));
|
||||||
|
|
||||||
|
Assert.assertTrue(iterator.hasNext());
|
||||||
|
row = iterator.next().getInputRows().get(0);
|
||||||
|
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i + 2)), row.getTimestamp());
|
||||||
|
Assert.assertEquals(StringUtils.format("name_%d", i + 1), Iterables.getOnlyElement(row.getDimension("name")));
|
||||||
|
Assert.assertEquals(Integer.toString(i + 1), Iterables.getOnlyElement(row.getDimension("score")));
|
||||||
|
Assert.assertEquals(files.get(i).toURI().toString(), row.getDimension(SystemField.URI.getFieldName()).get(0));
|
||||||
|
Assert.assertEquals(files.get(i).getAbsolutePath(), row.getDimension(SystemField.PATH.getFieldName()).get(0));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(numFiles, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIncorrectURI() throws IOException, URISyntaxException
|
public void testIncorrectURI() throws IOException, URISyntaxException
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue