mirror of https://github.com/apache/lucene.git
improve broken link checker; fix some broken links
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1328962 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
422bed652e
commit
79894b9744
|
@ -1,3 +1,18 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -62,6 +77,7 @@ class FindHyperlinks(HTMLParser):
|
||||||
self.printed = True
|
self.printed = True
|
||||||
|
|
||||||
def parse(baseURL, html):
|
def parse(baseURL, html):
|
||||||
|
global failures
|
||||||
parser = FindHyperlinks(baseURL)
|
parser = FindHyperlinks(baseURL)
|
||||||
try:
|
try:
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
|
@ -70,17 +86,22 @@ def parse(baseURL, html):
|
||||||
parser.printFile()
|
parser.printFile()
|
||||||
print ' WARNING: failed to parse:'
|
print ' WARNING: failed to parse:'
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
failures = True
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
#print ' %d links, %d anchors' % \
|
#print ' %d links, %d anchors' % \
|
||||||
# (len(parser.links), len(parser.anchors))
|
# (len(parser.links), len(parser.anchors))
|
||||||
return parser.links, parser.anchors
|
return parser.links, parser.anchors
|
||||||
|
|
||||||
|
failures = False
|
||||||
|
|
||||||
def checkAll(dirName):
|
def checkAll(dirName):
|
||||||
"""
|
"""
|
||||||
Checks *.html (recursively) under this directory.
|
Checks *.html (recursively) under this directory.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
global failures
|
||||||
|
|
||||||
# Find/parse all HTML files first
|
# Find/parse all HTML files first
|
||||||
print
|
print
|
||||||
print 'Crawl/parse...'
|
print 'Crawl/parse...'
|
||||||
|
@ -150,6 +171,13 @@ def checkAll(dirName):
|
||||||
print
|
print
|
||||||
print fullPath
|
print fullPath
|
||||||
print ' BROKEN ANCHOR: %s' % origLink
|
print ' BROKEN ANCHOR: %s' % origLink
|
||||||
|
|
||||||
|
failures = failures or printed
|
||||||
|
|
||||||
|
if failures:
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
checkAll(sys.argv[1])
|
checkAll(sys.argv[1])
|
||||||
|
|
|
@ -44,7 +44,7 @@ import org.apache.lucene.util.Version;
|
||||||
*/
|
*/
|
||||||
public final class CommonGramsFilter extends TokenFilter {
|
public final class CommonGramsFilter extends TokenFilter {
|
||||||
|
|
||||||
static final String GRAM_TYPE = "gram";
|
public static final String GRAM_TYPE = "gram";
|
||||||
private static final char SEPARATOR = '_';
|
private static final char SEPARATOR = '_';
|
||||||
|
|
||||||
private final CharArraySet commonWords;
|
private final CharArraySet commonWords;
|
||||||
|
|
|
@ -23,7 +23,7 @@ This package allows to benchmark search quality of a Lucene application.
|
||||||
<p>
|
<p>
|
||||||
In order to use this package you should provide:
|
In order to use this package you should provide:
|
||||||
<ul>
|
<ul>
|
||||||
<li>A <a href="../../search/Searcher.html">searcher</a>.</li>
|
<li>A <a href="{@docRoot}/../core/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a>.</li>
|
||||||
<li><a href="QualityQuery.html">Quality queries</a>.</li>
|
<li><a href="QualityQuery.html">Quality queries</a>.</li>
|
||||||
<li><a href="Judge.html">Judging object</a>.</li>
|
<li><a href="Judge.html">Judging object</a>.</li>
|
||||||
<li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>
|
<li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>
|
||||||
|
|
|
@ -397,7 +397,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
||||||
*
|
*
|
||||||
* <p>If instead this reader is a near real-time reader
|
* <p>If instead this reader is a near real-time reader
|
||||||
* (ie, obtained by a call to {@link
|
* (ie, obtained by a call to {@link
|
||||||
* IndexWriter#getReader}, or by calling {@link #openIfChanged}
|
* IndexReader#open(IndexWriter,boolean)}, or by calling {@link #openIfChanged}
|
||||||
* on a near real-time reader), then this method checks if
|
* on a near real-time reader), then this method checks if
|
||||||
* either a new commmit has occurred, or any new
|
* either a new commmit has occurred, or any new
|
||||||
* uncommitted changes have taken place via the writer.
|
* uncommitted changes have taken place via the writer.
|
||||||
|
|
|
@ -2517,7 +2517,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
/** Commits all changes to the index, specifying a
|
/** Commits all changes to the index, specifying a
|
||||||
* commitUserData Map (String -> String). This just
|
* commitUserData Map (String -> String). This just
|
||||||
* calls {@link #prepareCommit(Map)} (if you didn't
|
* calls {@link #prepareCommit(Map)} (if you didn't
|
||||||
* already call it) and then {@link #finishCommit}.
|
* already call it) and then {@link #commit}.
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
|
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
|
||||||
* you should immediately close the writer. See <a
|
* you should immediately close the writer. See <a
|
||||||
|
@ -3719,13 +3719,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
|
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If {@link #getReader} has been called (ie, this writer
|
/** If {@link IndexReader#open(IndexWriter,boolean)} has
|
||||||
* is in near real-time mode), then after a merge
|
* been called (ie, this writer is in near real-time
|
||||||
* completes, this class can be invoked to warm the
|
* mode), then after a merge completes, this class can be
|
||||||
* reader on the newly merged segment, before the merge
|
* invoked to warm the reader on the newly merged
|
||||||
* commits. This is not required for near real-time
|
* segment, before the merge commits. This is not
|
||||||
* search, but will reduce search latency on opening a
|
* required for near real-time search, but will reduce
|
||||||
* new near real-time reader after a merge completes.
|
* search latency on opening a new near real-time reader
|
||||||
|
* after a merge completes.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*
|
*
|
||||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.store.IOContext.Context;
|
||||||
* to the provided Directory instance.
|
* to the provided Directory instance.
|
||||||
*
|
*
|
||||||
* <p>See <a
|
* <p>See <a
|
||||||
* href="../../../../../contrib-misc/overview-summary.html#NativeUnixDirectory">Overview</a>
|
* href="{@docRoot}/overview-summary.html#NativeUnixDirectory">Overview</a>
|
||||||
* for more details.
|
* for more details.
|
||||||
*
|
*
|
||||||
* <p>To use this you must compile
|
* <p>To use this you must compile
|
||||||
|
|
|
@ -88,7 +88,7 @@ public abstract class QueryParserBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Initializes a query parser. Called by the QueryParser constructor
|
/** Initializes a query parser. Called by the QueryParser constructor
|
||||||
* @param matchVersion Lucene version to match. See <a href="#version">above</a>.
|
* @param matchVersion Lucene version to match. See <a href="QueryParser#version">here</a>.
|
||||||
* @param f the default field for query terms.
|
* @param f the default field for query terms.
|
||||||
* @param a used to find terms in the query text.
|
* @param a used to find terms in the query text.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class is generated by JavaCC. The only method that clients should need
|
* This class is generated by JavaCC. The only method that clients should need
|
||||||
* to call is <a href="#parse">parse()</a>.
|
* to call is {@link #parse}.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class QueryParser implements QueryParserConstants {
|
public class QueryParser implements QueryParserConstants {
|
||||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class is generated by JavaCC. The only method that clients should need
|
* This class is generated by JavaCC. The only method that clients should need
|
||||||
* to call is <a href="#parse">parse()</a>.
|
* to call is {@link #parse}.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class QueryParser {
|
public class QueryParser {
|
||||||
|
|
Loading…
Reference in New Issue