Fix expression result writing of arrays in Hadoop Ingestion (#15127)

This commit is contained in:
Pranav 2023-10-13 13:41:41 -07:00 committed by GitHub
parent 9ca10c7bd7
commit 4b0d1b3488
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 9 deletions

View File

@ -25,8 +25,10 @@ import com.google.common.primitives.Longs;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.math.expr.Evals;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -70,6 +72,8 @@ public final class Rows
} else if (inputValue instanceof byte[]) { } else if (inputValue instanceof byte[]) {
// convert byte[] to base64 encoded string // convert byte[] to base64 encoded string
return Collections.singletonList(StringUtils.encodeBase64String((byte[]) inputValue)); return Collections.singletonList(StringUtils.encodeBase64String((byte[]) inputValue));
} else if (inputValue instanceof Object[]) {
return Arrays.stream((Object[]) inputValue).map(Evals::asString).collect(Collectors.toList());
} else { } else {
return Collections.singletonList(String.valueOf(inputValue)); return Collections.singletonList(String.valueOf(inputValue));
} }

View File

@ -31,7 +31,6 @@ import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.InputBindings; import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.math.expr.Parser; import org.apache.druid.math.expr.Parser;
import org.apache.druid.segment.virtual.ExpressionSelectors;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
@ -111,9 +110,7 @@ public class ExpressionTransform implements Transform
public List<String> evalDimension(Row row) public List<String> evalDimension(Row row)
{ {
try { try {
return Rows.objectToStrings( return Rows.objectToStrings(expr.eval(InputBindings.forRow(row)).valueOrDefault());
ExpressionSelectors.coerceEvalToObjectOrList(expr.eval(InputBindings.forRow(row)))
);
} }
catch (Throwable t) { catch (Throwable t) {
throw new ISE(t, "Could not transform dimension value for %s reason: %s", name, t.getMessage()); throw new ISE(t, "Could not transform dimension value for %s reason: %s", name, t.getMessage());

View File

@ -25,6 +25,8 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.InputRowListPlusRawValues;
import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.data.input.MapBasedRow;
import org.apache.druid.data.input.Row;
import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.expression.TestExprMacroTable;
@ -37,6 +39,8 @@ import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.rules.ExpectedException; import org.junit.rules.ExpectedException;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -385,7 +389,7 @@ public class TransformerTest extends InitializedNullHandlingTest
Assert.assertNotNull(actual); Assert.assertNotNull(actual);
Assert.assertEquals(ImmutableList.of("dim"), actual.getDimensions()); Assert.assertEquals(ImmutableList.of("dim"), actual.getDimensions());
Assert.assertArrayEquals(new Object[]{1L, 2L, null, 3L}, (Object[]) actual.getRaw("dim")); Assert.assertArrayEquals(new Object[]{1L, 2L, null, 3L}, (Object[]) actual.getRaw("dim"));
Assert.assertEquals(Arrays.asList("1", "2", "null", "3"), actual.getDimension("dim")); Assert.assertArrayEquals(new String[]{"1", "2", null, "3"}, actual.getDimension("dim").toArray());
Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp());
} }
@ -412,9 +416,9 @@ public class TransformerTest extends InitializedNullHandlingTest
Assert.assertEquals(2.3, (Double) raw[1], 0.00001); Assert.assertEquals(2.3, (Double) raw[1], 0.00001);
Assert.assertNull(raw[2]); Assert.assertNull(raw[2]);
Assert.assertEquals(3.4, (Double) raw[3], 0.00001); Assert.assertEquals(3.4, (Double) raw[3], 0.00001);
Assert.assertEquals( Assert.assertArrayEquals(
Arrays.asList("1.2000000476837158", "2.299999952316284", "null", "3.4000000953674316"), new String[]{"1.2000000476837158", "2.299999952316284", null, "3.4000000953674316"},
actual.getDimension("dim") actual.getDimension("dim").toArray()
); );
Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp());
} }
@ -441,7 +445,78 @@ public class TransformerTest extends InitializedNullHandlingTest
Assert.assertEquals(2.3, (Double) raw[1], 0.0); Assert.assertEquals(2.3, (Double) raw[1], 0.0);
Assert.assertNull(raw[2]); Assert.assertNull(raw[2]);
Assert.assertEquals(3.4, (Double) raw[3], 0.0); Assert.assertEquals(3.4, (Double) raw[3], 0.0);
Assert.assertEquals(Arrays.asList("1.2", "2.3", "null", "3.4"), actual.getDimension("dim")); Assert.assertArrayEquals(new String[]{"1.2", "2.3", null, "3.4"}, actual.getDimension("dim").toArray());
Assert.assertEquals(row.getTimestamp(), actual.getTimestamp()); Assert.assertEquals(row.getTimestamp(), actual.getTimestamp());
} }
@Test
public void testTransformWithExpr()
{
final Transformer transformer = new Transformer(
new TransformSpec(
null,
ImmutableList.of(
new ExpressionTransform("dim", "array_slice(dim, 0, 5)", TestExprMacroTable.INSTANCE),
new ExpressionTransform("dim1", "array_slice(dim, 0, 1)", TestExprMacroTable.INSTANCE)
)
)
);
final List<String> dimList = ImmutableList.of("a", "b", "c", "d", "e", "f", "g");
final MapBasedRow row = new MapBasedRow(
DateTimes.nowUtc(),
ImmutableMap.of("dim", dimList)
);
Assert.assertEquals(row.getDimension("dim"), dimList);
Assert.assertEquals(row.getRaw("dim"), dimList);
final InputRow actualTranformedRow = transformer.transform(new InputRow()
{
@Override
public List<String> getDimensions()
{
return new ArrayList<>(row.getEvent().keySet());
}
@Override
public long getTimestampFromEpoch()
{
return 0;
}
@Override
public DateTime getTimestamp()
{
return row.getTimestamp();
}
@Override
public List<String> getDimension(String dimension)
{
return row.getDimension(dimension);
}
@Nullable
@Override
public Object getRaw(String dimension)
{
return row.getRaw(dimension);
}
@Nullable
@Override
public Number getMetric(String metric)
{
return row.getMetric(metric);
}
@Override
public int compareTo(Row o)
{
return row.compareTo(o);
}
});
Assert.assertEquals(actualTranformedRow.getDimension("dim"), dimList.subList(0, 5));
Assert.assertArrayEquals(dimList.subList(0, 5).toArray(), (Object[]) actualTranformedRow.getRaw("dim"));
Assert.assertArrayEquals(new Object[]{"a"}, actualTranformedRow.getDimension("dim1").toArray());
}
} }