mirror of https://github.com/apache/nifi.git
NIFI-12731: Ensure state is updated in GetHBase whenever the session is committed
Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com> This closes #8346.
This commit is contained in:
parent
c6f1d771e8
commit
2f42b44efa
|
@ -140,7 +140,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
private final AtomicReference<ScanResult> lastResult = new AtomicReference<>();
|
private final AtomicReference<ScanResult> lastResult = new AtomicReference<>();
|
||||||
private volatile List<Column> columns = new ArrayList<>();
|
private final List<Column> columns = new ArrayList<>();
|
||||||
private volatile String previousTable = null;
|
private volatile String previousTable = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -201,11 +201,11 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
for (final String column : columns) {
|
for (final String column : columns) {
|
||||||
if (column.contains(":")) {
|
if (column.contains(":")) {
|
||||||
final String[] parts = column.split(":");
|
final String[] parts = column.split(":");
|
||||||
final byte[] cf = parts[0].getBytes(Charset.forName("UTF-8"));
|
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
|
||||||
final byte[] cq = parts[1].getBytes(Charset.forName("UTF-8"));
|
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
|
||||||
this.columns.add(new Column(cf, cq));
|
this.columns.add(new Column(cf, cq));
|
||||||
} else {
|
} else {
|
||||||
final byte[] cf = column.getBytes(Charset.forName("UTF-8"));
|
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
|
||||||
this.columns.add(new Column(cf, null));
|
this.columns.add(new Column(cf, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -307,11 +307,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
|
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
|
||||||
|
|
||||||
final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
|
final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
|
||||||
Set<String> cellHashes = cellsMatchingTimestamp.get(rowHash);
|
Set<String> cellHashes = cellsMatchingTimestamp.computeIfAbsent(rowHash, k -> new HashSet<>());
|
||||||
if (cellHashes == null) {
|
|
||||||
cellHashes = new HashSet<>();
|
|
||||||
cellsMatchingTimestamp.put(rowHash, cellHashes);
|
|
||||||
}
|
|
||||||
cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
|
cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -336,40 +332,11 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
rowsPulledHolder.set(++rowsPulled);
|
rowsPulledHolder.set(++rowsPulled);
|
||||||
|
|
||||||
if (++rowsPulled % getBatchSize() == 0) {
|
if (++rowsPulled % getBatchSize() == 0) {
|
||||||
session.commitAsync();
|
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
final ScanResult scanResults = new ScanResult(latestTimestampHolder.get(), cellsMatchingTimestamp);
|
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
|
||||||
|
|
||||||
final ScanResult latestResult = lastResult.get();
|
|
||||||
if (latestResult == null || scanResults.getTimestamp() > latestResult.getTimestamp()) {
|
|
||||||
session.setState(scanResults.toFlatMap(), Scope.CLUSTER);
|
|
||||||
session.commitAsync(() -> updateScanResultsIfNewer(scanResults));
|
|
||||||
} else if (scanResults.getTimestamp() == latestResult.getTimestamp()) {
|
|
||||||
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
|
|
||||||
|
|
||||||
// copy the results of result.getMatchingCells() to combinedResults.
|
|
||||||
// do a deep copy because the Set may be modified below.
|
|
||||||
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
|
|
||||||
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// combined the results from 'lastResult'
|
|
||||||
for (final Map.Entry<String, Set<String>> entry : latestResult.getMatchingCells().entrySet()) {
|
|
||||||
final Set<String> existing = combinedResults.get(entry.getKey());
|
|
||||||
if (existing == null) {
|
|
||||||
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
|
|
||||||
} else {
|
|
||||||
existing.addAll(entry.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
|
|
||||||
session.setState(scanResult.toFlatMap(), Scope.CLUSTER);
|
|
||||||
|
|
||||||
session.commitAsync(() -> updateScanResultsIfNewer(scanResult));
|
|
||||||
}
|
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
getLogger().error("Failed to receive data from HBase due to {}", e);
|
getLogger().error("Failed to receive data from HBase due to {}", e);
|
||||||
session.rollback();
|
session.rollback();
|
||||||
|
@ -380,6 +347,39 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void updateStateAndCommit(final ProcessSession session, final long latestTimestamp, final Map<String, Set<String>> cellsMatchingTimestamp) throws IOException {
|
||||||
|
final ScanResult scanResults = new ScanResult(latestTimestamp, cellsMatchingTimestamp);
|
||||||
|
|
||||||
|
final ScanResult latestResult = lastResult.get();
|
||||||
|
if (latestResult == null || scanResults.getTimestamp() > latestResult.getTimestamp()) {
|
||||||
|
session.setState(scanResults.toFlatMap(), Scope.CLUSTER);
|
||||||
|
session.commitAsync(() -> updateScanResultsIfNewer(scanResults));
|
||||||
|
} else if (scanResults.getTimestamp() == latestResult.getTimestamp()) {
|
||||||
|
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
|
||||||
|
|
||||||
|
// copy the results of result.getMatchingCells() to combinedResults.
|
||||||
|
// do a deep copy because the Set may be modified below.
|
||||||
|
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
|
||||||
|
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// combined the results from 'lastResult'
|
||||||
|
for (final Map.Entry<String, Set<String>> entry : latestResult.getMatchingCells().entrySet()) {
|
||||||
|
final Set<String> existing = combinedResults.get(entry.getKey());
|
||||||
|
if (existing == null) {
|
||||||
|
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
|
||||||
|
} else {
|
||||||
|
existing.addAll(entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
|
||||||
|
session.setState(scanResult.toFlatMap(), Scope.CLUSTER);
|
||||||
|
|
||||||
|
session.commitAsync(() -> updateScanResultsIfNewer(scanResult));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void updateScanResultsIfNewer(final ScanResult scanResult) {
|
private void updateScanResultsIfNewer(final ScanResult scanResult) {
|
||||||
lastResult.getAndUpdate(current -> (current == null || scanResult.getTimestamp() > current.getTimestamp()) ? scanResult : current);
|
lastResult.getAndUpdate(current -> (current == null || scanResult.getTimestamp() > current.getTimestamp()) ? scanResult : current);
|
||||||
}
|
}
|
||||||
|
@ -495,11 +495,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
|
||||||
final String rowIndex = matcher.group(1);
|
final String rowIndex = matcher.group(1);
|
||||||
final String cellIndex = matcher.group(3);
|
final String cellIndex = matcher.group(3);
|
||||||
|
|
||||||
Set<String> cellHashes = rowIndexToMatchingCellHashes.get(rowIndex);
|
Set<String> cellHashes = rowIndexToMatchingCellHashes.computeIfAbsent(rowIndex, k -> new HashSet<>());
|
||||||
if (cellHashes == null) {
|
|
||||||
cellHashes = new HashSet<>();
|
|
||||||
rowIndexToMatchingCellHashes.put(rowIndex, cellHashes);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cellIndex == null) {
|
if (cellIndex == null) {
|
||||||
// this provides a Row ID.
|
// this provides a Row ID.
|
||||||
|
|
|
@ -16,11 +16,13 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.nifi.hbase.scan;
|
package org.apache.nifi.hbase.scan;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles a single row from an HBase scan.
|
* Handles a single row from an HBase scan.
|
||||||
*/
|
*/
|
||||||
public interface ResultHandler {
|
public interface ResultHandler {
|
||||||
|
|
||||||
void handle(byte[] row, ResultCell[] resultCells);
|
void handle(byte[] row, ResultCell[] resultCells) throws IOException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue