Get rid of inefficient Stream.count() (#12975)

This commit is contained in:
sabi0 2023-12-28 19:30:01 +01:00 committed by GitHub
parent 9c9949b2bc
commit 57b104e806
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 16 deletions

View File

@ -34,8 +34,8 @@ import java.util.stream.Stream;
* Title, Date, Dateline, Body * Title, Date, Dateline, Body
*/ */
public class ExtractReuters { public class ExtractReuters {
private Path reutersDir; private final Path reutersDir;
private Path outputDir; private final Path outputDir;
public ExtractReuters(Path reutersDir, Path outputDir) throws IOException { public ExtractReuters(Path reutersDir, Path outputDir) throws IOException {
this.reutersDir = reutersDir; this.reutersDir = reutersDir;
@ -46,7 +46,7 @@ public class ExtractReuters {
long count = 0; long count = 0;
Files.createDirectories(outputDir); Files.createDirectories(outputDir);
try (Stream<Path> files = Files.list(outputDir)) { try (Stream<Path> files = Files.list(outputDir)) {
if (files.count() > 0) { if (files.findAny().isPresent()) {
throw new IOException("The output directory must be empty: " + outputDir); throw new IOException("The output directory must be empty: " + outputDir);
} }
} }
@ -65,9 +65,9 @@ public class ExtractReuters {
Pattern EXTRACTION_PATTERN = Pattern EXTRACTION_PATTERN =
Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>"); Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
private static String[] META_CHARS = {"&", "<", ">", "\"", "'"}; private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};
private static String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"}; private static final String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
/** Override if you wish to change what is extracted */ /** Override if you wish to change what is extracted */
protected void extractFile(Path sgmFile) throws IOException { protected void extractFile(Path sgmFile) throws IOException {
@ -80,7 +80,7 @@ public class ExtractReuters {
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
// when we see a closing reuters tag, flush the file // when we see a closing reuters tag, flush the file
if (line.indexOf("</REUTERS") == -1) { if (line.contains("</REUTERS") == false) {
// Replace the SGM escape sequences // Replace the SGM escape sequences
buffer.append(line).append(' '); // accumulate the strings for now, buffer.append(line).append(' '); // accumulate the strings for now,

View File

@ -86,8 +86,8 @@ public final class FlattenGraphFilter extends TokenFilter {
} }
/** /**
* Gathers up merged input positions into a single output position, only for the current * Gathers merged input positions into a single output position, only for the current "frontier"
* "frontier" of nodes we've seen but can't yet output because they are not frozen. * of nodes we've seen but can't yet output because they are not frozen.
*/ */
private static final class OutputNode implements RollingBuffer.Resettable { private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>(); private final List<Integer> inputNodes = new ArrayList<>();
@ -115,7 +115,7 @@ public final class FlattenGraphFilter extends TokenFilter {
} }
private final RollingBuffer<InputNode> inputNodes = private final RollingBuffer<InputNode> inputNodes =
new RollingBuffer<InputNode>() { new RollingBuffer<>() {
@Override @Override
protected InputNode newInstance() { protected InputNode newInstance() {
return new InputNode(); return new InputNode();
@ -123,7 +123,7 @@ public final class FlattenGraphFilter extends TokenFilter {
}; };
private final RollingBuffer<OutputNode> outputNodes = private final RollingBuffer<OutputNode> outputNodes =
new RollingBuffer<OutputNode>() { new RollingBuffer<>() {
@Override @Override
protected OutputNode newInstance() { protected OutputNode newInstance() {
return new OutputNode(); return new OutputNode();
@ -193,10 +193,10 @@ public final class FlattenGraphFilter extends TokenFilter {
+ " vs output.inputNodes.size()=" + " vs output.inputNodes.size()="
+ output.inputNodes.size(); + output.inputNodes.size();
InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut)); InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) { if (done && inputNode.tokens.isEmpty() && outputFrom >= outputNodes.getMaxPos()) {
return false; return false;
} }
if (inputNode.tokens.size() == 0) { if (inputNode.tokens.isEmpty()) {
assert inputNode.nextOut == 0; assert inputNode.nextOut == 0;
// Hole dest nodes should never be merged since 1) we always // Hole dest nodes should never be merged since 1) we always
// assign them to a new output position, and 2) since they never // assign them to a new output position, and 2) since they never
@ -271,7 +271,7 @@ public final class FlattenGraphFilter extends TokenFilter {
* @param output target output node * @param output target output node
*/ */
private void freeBefore(OutputNode output) { private void freeBefore(OutputNode output) {
/* We've released all of the tokens that end at the current output, so free all output nodes before this. /* We've released all the tokens that end at the current output, so free all output nodes before this.
Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph
than some of their alternate path tokens. Because of this case we can only free from the minimum because than some of their alternate path tokens. Because of this case we can only free from the minimum because
the minimum node will have come from before the second shingled token. the minimum node will have come from before the second shingled token.
@ -283,7 +283,7 @@ public final class FlattenGraphFilter extends TokenFilter {
int freeBefore = Collections.min(output.inputNodes); int freeBefore = Collections.min(output.inputNodes);
// This will catch a node being freed early if it is input to the next output. // This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output? // Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0 assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
: "FreeBefore " + freeBefore + " will free in use nodes"; : "FreeBefore " + freeBefore + " will free in use nodes";
inputNodes.freeBefore(freeBefore); inputNodes.freeBefore(freeBefore);
outputNodes.freeBefore(outputFrom); outputNodes.freeBefore(outputFrom);