mirror of
https://github.com/apache/nifi.git
synced 2025-03-06 09:29:33 +00:00
NIFI-12238 Fix SplitText endline trimming with max fragment size (#7892)
This commit is contained in:
parent
574c2b2168
commit
91e4b453b4
@ -459,13 +459,6 @@ public class SplitText extends AbstractProcessor {
|
|||||||
while ((offsetInfo = demarcator.nextOffsetInfo()) != null) {
|
while ((offsetInfo = demarcator.nextOffsetInfo()) != null) {
|
||||||
lastCrlfLength = offsetInfo.getCrlfLength();
|
lastCrlfLength = offsetInfo.getCrlfLength();
|
||||||
|
|
||||||
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
|
|
||||||
trailingCrlfLength += offsetInfo.getCrlfLength();
|
|
||||||
trailingLineCount++;
|
|
||||||
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
|
|
||||||
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) {
|
if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) {
|
||||||
if (length == 0) { // single line per split
|
if (length == 0) { // single line per split
|
||||||
length += offsetInfo.getLength();
|
length += offsetInfo.getLength();
|
||||||
@ -474,14 +467,21 @@ public class SplitText extends AbstractProcessor {
|
|||||||
remaningOffsetInfo = offsetInfo;
|
remaningOffsetInfo = offsetInfo;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
|
||||||
|
trailingCrlfLength += offsetInfo.getCrlfLength();
|
||||||
|
trailingLineCount++;
|
||||||
|
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
|
||||||
|
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
|
||||||
|
}
|
||||||
|
|
||||||
length += offsetInfo.getLength();
|
length += offsetInfo.getLength();
|
||||||
actualLineCount++;
|
actualLineCount++;
|
||||||
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
|
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (actualLineCount > 0) {
|
if (actualLineCount > 0) {
|
||||||
if (length - trailingCrlfLength >= lastCrlfLength) {
|
if (length - trailingCrlfLength >= lastCrlfLength) {
|
||||||
|
@ -890,4 +890,25 @@ public class TestSplitText {
|
|||||||
splits.get(1).assertContentEquals("\n");
|
splits.get(1).assertContentEquals("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxFragmentSizeWithTrimmedEndlines() {
|
||||||
|
final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText());
|
||||||
|
splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2");
|
||||||
|
splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "0");
|
||||||
|
splitRunner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "30 B");
|
||||||
|
splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true");
|
||||||
|
|
||||||
|
splitRunner.enqueue("header1\nheader2\nline1 longer than limit\nline2\nline3\n\n\n\n\n");
|
||||||
|
|
||||||
|
splitRunner.run();
|
||||||
|
splitRunner.assertTransferCount(SplitText.REL_SPLITS, 3);
|
||||||
|
splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1);
|
||||||
|
splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0);
|
||||||
|
|
||||||
|
final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS);
|
||||||
|
splits.get(0).assertContentEquals("header1\nheader2\nline1 longer than limit");
|
||||||
|
splits.get(1).assertContentEquals("header1\nheader2\nline2\nline3");
|
||||||
|
splits.get(2).assertContentEquals("header1\nheader2");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user