mirror of
https://github.com/apache/nifi.git
synced 2025-03-06 09:29:33 +00:00
NIFI-12238 Fix SplitText endline trimming with max fragment size (#7892)
This commit is contained in:
parent
574c2b2168
commit
91e4b453b4
@ -459,13 +459,6 @@ public class SplitText extends AbstractProcessor {
|
||||
while ((offsetInfo = demarcator.nextOffsetInfo()) != null) {
|
||||
lastCrlfLength = offsetInfo.getCrlfLength();
|
||||
|
||||
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
|
||||
trailingCrlfLength += offsetInfo.getCrlfLength();
|
||||
trailingLineCount++;
|
||||
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
|
||||
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
|
||||
}
|
||||
|
||||
if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) {
|
||||
if (length == 0) { // single line per split
|
||||
length += offsetInfo.getLength();
|
||||
@ -474,12 +467,19 @@ public class SplitText extends AbstractProcessor {
|
||||
remaningOffsetInfo = offsetInfo;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
length += offsetInfo.getLength();
|
||||
actualLineCount++;
|
||||
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
|
||||
trailingCrlfLength += offsetInfo.getCrlfLength();
|
||||
trailingLineCount++;
|
||||
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
|
||||
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
|
||||
}
|
||||
|
||||
length += offsetInfo.getLength();
|
||||
actualLineCount++;
|
||||
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -890,4 +890,25 @@ public class TestSplitText {
|
||||
splits.get(1).assertContentEquals("\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxFragmentSizeWithTrimmedEndlines() {
|
||||
final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText());
|
||||
splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2");
|
||||
splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "0");
|
||||
splitRunner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "30 B");
|
||||
splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true");
|
||||
|
||||
splitRunner.enqueue("header1\nheader2\nline1 longer than limit\nline2\nline3\n\n\n\n\n");
|
||||
|
||||
splitRunner.run();
|
||||
splitRunner.assertTransferCount(SplitText.REL_SPLITS, 3);
|
||||
splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1);
|
||||
splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0);
|
||||
|
||||
final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS);
|
||||
splits.get(0).assertContentEquals("header1\nheader2\nline1 longer than limit");
|
||||
splits.get(1).assertContentEquals("header1\nheader2\nline2\nline3");
|
||||
splits.get(2).assertContentEquals("header1\nheader2");
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user