mirror of https://github.com/apache/lucene.git
Get rid of inefficient Stream.count() (#12975)
This commit is contained in:
parent
9c9949b2bc
commit
57b104e806
|
@ -34,8 +34,8 @@ import java.util.stream.Stream;
|
|||
* Title, Date, Dateline, Body
|
||||
*/
|
||||
public class ExtractReuters {
|
||||
private Path reutersDir;
|
||||
private Path outputDir;
|
||||
private final Path reutersDir;
|
||||
private final Path outputDir;
|
||||
|
||||
public ExtractReuters(Path reutersDir, Path outputDir) throws IOException {
|
||||
this.reutersDir = reutersDir;
|
||||
|
@ -45,8 +45,8 @@ public class ExtractReuters {
|
|||
public void extract() throws IOException {
|
||||
long count = 0;
|
||||
Files.createDirectories(outputDir);
|
||||
try(Stream<Path> files = Files.list(outputDir)) {
|
||||
if (files.count() > 0) {
|
||||
try (Stream<Path> files = Files.list(outputDir)) {
|
||||
if (files.findAny().isPresent()) {
|
||||
throw new IOException("The output directory must be empty: " + outputDir);
|
||||
}
|
||||
}
|
||||
|
@ -65,9 +65,9 @@ public class ExtractReuters {
|
|||
Pattern EXTRACTION_PATTERN =
|
||||
Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
|
||||
|
||||
private static String[] META_CHARS = {"&", "<", ">", "\"", "'"};
|
||||
private static final String[] META_CHARS = {"&", "<", ">", "\"", "'"};
|
||||
|
||||
private static String[] META_CHARS_SERIALIZATIONS = {"&", "<", ">", """, "'"};
|
||||
private static final String[] META_CHARS_SERIALIZATIONS = {"&", "<", ">", """, "'"};
|
||||
|
||||
/** Override if you wish to change what is extracted */
|
||||
protected void extractFile(Path sgmFile) throws IOException {
|
||||
|
@ -80,7 +80,7 @@ public class ExtractReuters {
|
|||
while ((line = reader.readLine()) != null) {
|
||||
// when we see a closing reuters tag, flush the file
|
||||
|
||||
if (line.indexOf("</REUTERS") == -1) {
|
||||
if (line.contains("</REUTERS") == false) {
|
||||
// Replace the SGM escape sequences
|
||||
|
||||
buffer.append(line).append(' '); // accumulate the strings for now,
|
||||
|
|
|
@ -86,8 +86,8 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gathers up merged input positions into a single output position, only for the current
|
||||
* "frontier" of nodes we've seen but can't yet output because they are not frozen.
|
||||
* Gathers merged input positions into a single output position, only for the current "frontier"
|
||||
* of nodes we've seen but can't yet output because they are not frozen.
|
||||
*/
|
||||
private static final class OutputNode implements RollingBuffer.Resettable {
|
||||
private final List<Integer> inputNodes = new ArrayList<>();
|
||||
|
@ -115,7 +115,7 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
private final RollingBuffer<InputNode> inputNodes =
|
||||
new RollingBuffer<InputNode>() {
|
||||
new RollingBuffer<>() {
|
||||
@Override
|
||||
protected InputNode newInstance() {
|
||||
return new InputNode();
|
||||
|
@ -123,7 +123,7 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
};
|
||||
|
||||
private final RollingBuffer<OutputNode> outputNodes =
|
||||
new RollingBuffer<OutputNode>() {
|
||||
new RollingBuffer<>() {
|
||||
@Override
|
||||
protected OutputNode newInstance() {
|
||||
return new OutputNode();
|
||||
|
@ -193,10 +193,10 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
+ " vs output.inputNodes.size()="
|
||||
+ output.inputNodes.size();
|
||||
InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
|
||||
if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) {
|
||||
if (done && inputNode.tokens.isEmpty() && outputFrom >= outputNodes.getMaxPos()) {
|
||||
return false;
|
||||
}
|
||||
if (inputNode.tokens.size() == 0) {
|
||||
if (inputNode.tokens.isEmpty()) {
|
||||
assert inputNode.nextOut == 0;
|
||||
// Hole dest nodes should never be merged since 1) we always
|
||||
// assign them to a new output position, and 2) since they never
|
||||
|
@ -210,7 +210,7 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
// Don't free from a hole src. Since no edge leaves here book keeping may be incorrect.
|
||||
// Don't free from a hole src. Since no edge leaves here bookkeeping may be incorrect.
|
||||
// Later output nodes may point to earlier input nodes. So we don't want to free them yet.
|
||||
freeBefore(output);
|
||||
continue;
|
||||
|
@ -271,7 +271,7 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
* @param output target output node
|
||||
*/
|
||||
private void freeBefore(OutputNode output) {
|
||||
/* We've released all of the tokens that end at the current output, so free all output nodes before this.
|
||||
/* We've released all the tokens that end at the current output, so free all output nodes before this.
|
||||
Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph
|
||||
than some of their alternate path tokens. Because of this case we can only free from the minimum because
|
||||
the minimum node will have come from before the second shingled token.
|
||||
|
@ -283,7 +283,7 @@ public final class FlattenGraphFilter extends TokenFilter {
|
|||
int freeBefore = Collections.min(output.inputNodes);
|
||||
// This will catch a node being freed early if it is input to the next output.
|
||||
// Could a freed early node be input to a later output?
|
||||
assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0
|
||||
assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
|
||||
: "FreeBefore " + freeBefore + " will free in use nodes";
|
||||
inputNodes.freeBefore(freeBefore);
|
||||
outputNodes.freeBefore(outputFrom);
|
||||
|
|
Loading…
Reference in New Issue