improve broken link checker; fix some broken links

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1328962 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-04-22 20:08:53 +00:00
parent 422bed652e
commit 79894b9744
9 changed files with 44 additions and 15 deletions

View File

@ -1,3 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
import os
import sys
@ -62,6 +77,7 @@ class FindHyperlinks(HTMLParser):
self.printed = True
def parse(baseURL, html):
global failures
parser = FindHyperlinks(baseURL)
try:
parser.feed(html)
@ -70,17 +86,22 @@ def parse(baseURL, html):
parser.printFile()
print ' WARNING: failed to parse:'
traceback.print_exc()
failures = True
return [], []
#print ' %d links, %d anchors' % \
# (len(parser.links), len(parser.anchors))
return parser.links, parser.anchors
failures = False
def checkAll(dirName):
"""
Checks *.html (recursively) under this directory.
"""
global failures
# Find/parse all HTML files first
print
print 'Crawl/parse...'
@ -150,6 +171,13 @@ def checkAll(dirName):
print
print fullPath
print ' BROKEN ANCHOR: %s' % origLink
failures = failures or printed
if failures:
sys.exit(1)
else:
sys.exit(0)
if __name__ == '__main__':
checkAll(sys.argv[1])

View File

@ -44,7 +44,7 @@ import org.apache.lucene.util.Version;
*/
public final class CommonGramsFilter extends TokenFilter {
static final String GRAM_TYPE = "gram";
public static final String GRAM_TYPE = "gram";
private static final char SEPARATOR = '_';
private final CharArraySet commonWords;

View File

@ -23,7 +23,7 @@ This package allows to benchmark search quality of a Lucene application.
<p>
In order to use this package you should provide:
<ul>
<li>A <a href="../../search/Searcher.html">searcher</a>.</li>
<li>A <a href="{@docRoot}/../core/org/apache/lucene/search/IndexSearcher.html">IndexSearcher</a>.</li>
<li><a href="QualityQuery.html">Quality queries</a>.</li>
<li><a href="Judge.html">Judging object</a>.</li>
<li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>

View File

@ -397,7 +397,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
*
* <p>If instead this reader is a near real-time reader
* (ie, obtained by a call to {@link
* IndexWriter#getReader}, or by calling {@link #openIfChanged}
* IndexReader#open(IndexWriter,boolean)}, or by calling {@link #openIfChanged}
* on a near real-time reader), then this method checks if
* either a new commmit has occurred, or any new
* uncommitted changes have taken place via the writer.

View File

@ -2517,7 +2517,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
/** Commits all changes to the index, specifying a
* commitUserData Map (String -> String). This just
* calls {@link #prepareCommit(Map)} (if you didn't
* already call it) and then {@link #finishCommit}.
* already call it) and then {@link #commit}.
*
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
@ -3719,13 +3719,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
}
/** If {@link #getReader} has been called (ie, this writer
* is in near real-time mode), then after a merge
* completes, this class can be invoked to warm the
* reader on the newly merged segment, before the merge
* commits. This is not required for near real-time
* search, but will reduce search latency on opening a
* new near real-time reader after a merge completes.
/** If {@link IndexReader#open(IndexWriter,boolean)} has
* been called (ie, this writer is in near real-time
* mode), then after a merge completes, this class can be
* invoked to warm the reader on the newly merged
* segment, before the merge commits. This is not
* required for near real-time search, but will reduce
* search latency on opening a new near real-time reader
* after a merge completes.
*
* @lucene.experimental
*

View File

@ -43,7 +43,7 @@ import org.apache.lucene.store.IOContext.Context;
* to the provided Directory instance.
*
* <p>See <a
* href="../../../../../contrib-misc/overview-summary.html#NativeUnixDirectory">Overview</a>
* href="{@docRoot}/overview-summary.html#NativeUnixDirectory">Overview</a>
* for more details.
*
* <p>To use this you must compile

View File

@ -88,7 +88,7 @@ public abstract class QueryParserBase {
}
/** Initializes a query parser. Called by the QueryParser constructor
* @param matchVersion Lucene version to match. See <a href="#version">above</a>.
* @param matchVersion Lucene version to match. See <a href="QueryParser#version">here</a>.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/

View File

@ -20,7 +20,7 @@ import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is <a href="#parse">parse()</a>.
* to call is {@link #parse}.
*/
public class QueryParser implements QueryParserConstants {

View File

@ -49,7 +49,7 @@ import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is <a href="#parse">parse()</a>.
* to call is {@link #parse}.
*/
public class QueryParser {