mirror of https://github.com/apache/nifi.git
NIFI-4102: If first line read does not match Grok expression, skip line
Signed-off-by: Matt Burgess <mattyb149@apache.org> This closes #1932
This commit is contained in:
parent
ba3372a1dc
commit
73e601bc55
|
@ -21,7 +21,6 @@ import java.io.BufferedReader;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -70,19 +69,17 @@ public class GrokRecordReader implements RecordReader {
|
|||
|
||||
@Override
|
||||
public Record nextRecord() throws IOException, MalformedRecordException {
|
||||
final String line = nextLine == null ? reader.readLine() : nextLine;
|
||||
nextLine = null; // ensure that we don't process nextLine again
|
||||
if (line == null) {
|
||||
return null;
|
||||
}
|
||||
Map<String, Object> valueMap = null;
|
||||
while (valueMap == null || valueMap.isEmpty()) {
|
||||
final String line = nextLine == null ? reader.readLine() : nextLine;
|
||||
nextLine = null; // ensure that we don't process nextLine again
|
||||
if (line == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final RecordSchema schema = getSchema();
|
||||
|
||||
final Match match = grok.match(line);
|
||||
match.captures();
|
||||
final Map<String, Object> valueMap = match.toMap();
|
||||
if (valueMap.isEmpty()) { // We were unable to match the pattern so return an empty Object array.
|
||||
return new MapRecord(schema, Collections.emptyMap());
|
||||
final Match match = grok.match(line);
|
||||
match.captures();
|
||||
valueMap = match.toMap();
|
||||
}
|
||||
|
||||
// Read the next line to see if it matches the pattern (in which case we will simply leave it for
|
||||
|
@ -149,7 +146,7 @@ public class GrokRecordReader implements RecordReader {
|
|||
|
||||
return new MapRecord(schema, values);
|
||||
} catch (final Exception e) {
|
||||
throw new MalformedRecordException("Found invalid log record and will skip it. Record: " + line, e);
|
||||
throw new MalformedRecordException("Found invalid log record and will skip it. Record: " + nextLine, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -226,4 +226,75 @@ public class TestGrokRecordReader {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipUnmatchedRecordFirstLine() throws GrokException, IOException, MalformedRecordException {
|
||||
final String nonMatchingRecord = "hello there";
|
||||
final String matchingRecord = "1 2 3 4 5";
|
||||
|
||||
final String input = nonMatchingRecord + "\n" + matchingRecord;
|
||||
final byte[] inputBytes = input.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
try (final InputStream in = new ByteArrayInputStream(inputBytes)) {
|
||||
final Grok grok = new Grok();
|
||||
grok.addPatternFromFile("src/main/resources/default-grok-patterns.txt");
|
||||
grok.compile("%{NUMBER:first} %{NUMBER:second} %{NUMBER:third} %{NUMBER:fourth} %{NUMBER:fifth}");
|
||||
|
||||
final RecordSchema schema = GrokReader.createRecordSchema(grok);
|
||||
final List<String> fieldNames = schema.getFieldNames();
|
||||
assertEquals(6, fieldNames.size());
|
||||
assertTrue(fieldNames.contains("first"));
|
||||
assertTrue(fieldNames.contains("second"));
|
||||
assertTrue(fieldNames.contains("third"));
|
||||
assertTrue(fieldNames.contains("fourth"));
|
||||
assertTrue(fieldNames.contains("fifth"));
|
||||
|
||||
final GrokRecordReader deserializer = new GrokRecordReader(in, grok, schema, false);
|
||||
final Record record = deserializer.nextRecord();
|
||||
|
||||
assertEquals("1", record.getValue("first"));
|
||||
assertEquals("2", record.getValue("second"));
|
||||
assertEquals("3", record.getValue("third"));
|
||||
assertEquals("4", record.getValue("fourth"));
|
||||
assertEquals("5", record.getValue("fifth"));
|
||||
|
||||
assertNull(deserializer.nextRecord());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipUnmatchedRecordMiddle() throws GrokException, IOException, MalformedRecordException {
|
||||
final String nonMatchingRecord = "hello there";
|
||||
final String matchingRecord = "1 2 3 4 5";
|
||||
|
||||
final String input = matchingRecord + "\n" + nonMatchingRecord + "\n" + matchingRecord;
|
||||
final byte[] inputBytes = input.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
try (final InputStream in = new ByteArrayInputStream(inputBytes)) {
|
||||
final Grok grok = new Grok();
|
||||
grok.addPatternFromFile("src/main/resources/default-grok-patterns.txt");
|
||||
grok.compile("%{NUMBER:first} %{NUMBER:second} %{NUMBER:third} %{NUMBER:fourth} %{NUMBER:fifth}");
|
||||
|
||||
final RecordSchema schema = GrokReader.createRecordSchema(grok);
|
||||
final List<String> fieldNames = schema.getFieldNames();
|
||||
assertEquals(6, fieldNames.size());
|
||||
assertTrue(fieldNames.contains("first"));
|
||||
assertTrue(fieldNames.contains("second"));
|
||||
assertTrue(fieldNames.contains("third"));
|
||||
assertTrue(fieldNames.contains("fourth"));
|
||||
assertTrue(fieldNames.contains("fifth"));
|
||||
|
||||
final GrokRecordReader deserializer = new GrokRecordReader(in, grok, schema, false);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
final Record record = deserializer.nextRecord();
|
||||
|
||||
assertEquals("1", record.getValue("first"));
|
||||
assertEquals("2", record.getValue("second"));
|
||||
assertEquals("3", record.getValue("third"));
|
||||
assertEquals("4", record.getValue("fourth"));
|
||||
assertEquals("5", record.getValue("fifth"));
|
||||
}
|
||||
|
||||
assertNull(deserializer.nextRecord());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue