Get rid of inefficient Stream.count() (#12975)

This commit is contained in:
sabi0 2023-12-28 19:30:01 +01:00 committed by GitHub
parent 9c9949b2bc
commit 57b104e806
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 16 deletions

View File

@ -34,8 +34,8 @@ import java.util.stream.Stream;
* Title, Date, Dateline, Body
*/
public class ExtractReuters {
private Path reutersDir;
private Path outputDir;
private final Path reutersDir;
private final Path outputDir;
public ExtractReuters(Path reutersDir, Path outputDir) throws IOException {
this.reutersDir = reutersDir;
@ -45,8 +45,8 @@ public class ExtractReuters {
public void extract() throws IOException {
long count = 0;
Files.createDirectories(outputDir);
try(Stream<Path> files = Files.list(outputDir)) {
if (files.count() > 0) {
try (Stream<Path> files = Files.list(outputDir)) {
if (files.findAny().isPresent()) {
throw new IOException("The output directory must be empty: " + outputDir);
}
}
@ -65,9 +65,9 @@ public class ExtractReuters {
Pattern EXTRACTION_PATTERN =
Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
private static String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
private static final String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
/** Override if you wish to change what is extracted */
protected void extractFile(Path sgmFile) throws IOException {
@ -80,7 +80,7 @@ public class ExtractReuters {
while ((line = reader.readLine()) != null) {
// when we see a closing reuters tag, flush the file
if (line.indexOf("</REUTERS") == -1) {
if (line.contains("</REUTERS") == false) {
// Replace the SGM escape sequences
buffer.append(line).append(' '); // accumulate the strings for now,

View File

@ -86,8 +86,8 @@ public final class FlattenGraphFilter extends TokenFilter {
}
/**
* Gathers up merged input positions into a single output position, only for the current
* "frontier" of nodes we've seen but can't yet output because they are not frozen.
* Gathers merged input positions into a single output position, only for the current "frontier"
* of nodes we've seen but can't yet output because they are not frozen.
*/
private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>();
@ -115,7 +115,7 @@ public final class FlattenGraphFilter extends TokenFilter {
}
private final RollingBuffer<InputNode> inputNodes =
new RollingBuffer<InputNode>() {
new RollingBuffer<>() {
@Override
protected InputNode newInstance() {
return new InputNode();
@ -123,7 +123,7 @@ public final class FlattenGraphFilter extends TokenFilter {
};
private final RollingBuffer<OutputNode> outputNodes =
new RollingBuffer<OutputNode>() {
new RollingBuffer<>() {
@Override
protected OutputNode newInstance() {
return new OutputNode();
@ -193,10 +193,10 @@ public final class FlattenGraphFilter extends TokenFilter {
+ " vs output.inputNodes.size()="
+ output.inputNodes.size();
InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) {
if (done && inputNode.tokens.isEmpty() && outputFrom >= outputNodes.getMaxPos()) {
return false;
}
if (inputNode.tokens.size() == 0) {
if (inputNode.tokens.isEmpty()) {
assert inputNode.nextOut == 0;
// Hole dest nodes should never be merged since 1) we always
// assign them to a new output position, and 2) since they never
@ -210,7 +210,7 @@ public final class FlattenGraphFilter extends TokenFilter {
continue;
}
}
// Don't free from a hole src. Since no edge leaves here book keeping may be incorrect.
// Don't free from a hole src. Since no edge leaves here bookkeeping may be incorrect.
// Later output nodes may point to earlier input nodes. So we don't want to free them yet.
freeBefore(output);
continue;
@ -271,7 +271,7 @@ public final class FlattenGraphFilter extends TokenFilter {
* @param output target output node
*/
private void freeBefore(OutputNode output) {
/* We've released all of the tokens that end at the current output, so free all output nodes before this.
/* We've released all the tokens that end at the current output, so free all output nodes before this.
Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph
than some of their alternate path tokens. Because of this case we can only free from the minimum because
the minimum node will have come from before the second shingled token.
@ -283,7 +283,7 @@ public final class FlattenGraphFilter extends TokenFilter {
int freeBefore = Collections.min(output.inputNodes);
// This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0
assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
: "FreeBefore " + freeBefore + " will free in use nodes";
inputNodes.freeBefore(freeBefore);
outputNodes.freeBefore(outputFrom);