mirror of
https://github.com/apache/lucene.git
synced 2025-03-02 06:19:27 +00:00
LUCENE-3983: HTMLStripCharFilter: Stop upcasing HTML character entity names at class initialization time; instead, provide hard-coded upcased versions for a small set of them.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0b1d814c94
commit
5efed3447e
@ -62,8 +62,16 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
|
||||
| "weierp" | "xi" | "yacute" | "yen" | "yuml" | "zeta"
|
||||
| "zwj" | "zwnj" )
|
||||
%{
|
||||
private static final Set<String> upperCaseVariantsAccepted
|
||||
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
|
||||
private static final Map<String,String> upperCaseVariantsAccepted
|
||||
= new HashMap<String,String>();
|
||||
static {
|
||||
upperCaseVariantsAccepted.put("quot", "QUOT");
|
||||
upperCaseVariantsAccepted.put("copy", "COPY");
|
||||
upperCaseVariantsAccepted.put("gt", "GT");
|
||||
upperCaseVariantsAccepted.put("lt", "LT");
|
||||
upperCaseVariantsAccepted.put("reg", "REG");
|
||||
upperCaseVariantsAccepted.put("amp", "AMP");
|
||||
}
|
||||
private static final CharArrayMap<Character> entityValues
|
||||
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
|
||||
static {
|
||||
@ -145,8 +153,9 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
|
||||
for (int i = 0 ; i < entities.length ; i += 2) {
|
||||
Character value = entities[i + 1].charAt(0);
|
||||
entityValues.put(entities[i], value);
|
||||
if (upperCaseVariantsAccepted.contains(entities[i])) {
|
||||
entityValues.put(entities[i].toUpperCase(), value);
|
||||
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
|
||||
if (upperCaseVariant != null) {
|
||||
entityValues.put(upperCaseVariant, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/24/12 4:50 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */
|
||||
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
@ -21,7 +21,8 @@ package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
@ -39,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
||||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 3/24/12 4:50 PM from the specification file
|
||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
* on 5/18/12 12:24 PM from the specification file
|
||||
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
*/
|
||||
public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
|
||||
@ -30522,8 +30523,16 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
private boolean zzEOFDone;
|
||||
|
||||
/* user code: */
|
||||
private static final Set<String> upperCaseVariantsAccepted
|
||||
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
|
||||
private static final Map<String,String> upperCaseVariantsAccepted
|
||||
= new HashMap<String,String>();
|
||||
static {
|
||||
upperCaseVariantsAccepted.put("quot", "QUOT");
|
||||
upperCaseVariantsAccepted.put("copy", "COPY");
|
||||
upperCaseVariantsAccepted.put("gt", "GT");
|
||||
upperCaseVariantsAccepted.put("lt", "LT");
|
||||
upperCaseVariantsAccepted.put("reg", "REG");
|
||||
upperCaseVariantsAccepted.put("amp", "AMP");
|
||||
}
|
||||
private static final CharArrayMap<Character> entityValues
|
||||
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
|
||||
static {
|
||||
@ -30605,8 +30614,9 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
for (int i = 0 ; i < entities.length ; i += 2) {
|
||||
Character value = entities[i + 1].charAt(0);
|
||||
entityValues.put(entities[i], value);
|
||||
if (upperCaseVariantsAccepted.contains(entities[i])) {
|
||||
entityValues.put(entities[i].toUpperCase(), value);
|
||||
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
|
||||
if (upperCaseVariant != null) {
|
||||
entityValues.put(upperCaseVariant, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,8 @@ package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -50,8 +50,16 @@ def main():
|
||||
print output_line, ')'
|
||||
|
||||
print '%{'
|
||||
print ' private static final Set<String> upperCaseVariantsAccepted'
|
||||
print ' = new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));'
|
||||
print ' private static final Map<String,String> upperCaseVariantsAccepted'
|
||||
print ' = new HashMap<String,String>();'
|
||||
print ' static {'
|
||||
print ' upperCaseVariantsAccepted.put("quot", "QUOT");'
|
||||
print ' upperCaseVariantsAccepted.put("copy", "COPY");'
|
||||
print ' upperCaseVariantsAccepted.put("gt", "GT");'
|
||||
print ' upperCaseVariantsAccepted.put("lt", "LT");'
|
||||
print ' upperCaseVariantsAccepted.put("reg", "REG");'
|
||||
print ' upperCaseVariantsAccepted.put("amp", "AMP");'
|
||||
print ' }'
|
||||
print ' private static final CharArrayMap<Character> entityValues'
|
||||
print ' = new CharArrayMap<Character>(Version.LUCENE_40, %i, false);' % len(keys)
|
||||
print ' static {'
|
||||
@ -68,8 +76,9 @@ def main():
|
||||
print ' for (int i = 0 ; i < entities.length ; i += 2) {'
|
||||
print ' Character value = entities[i + 1].charAt(0);'
|
||||
print ' entityValues.put(entities[i], value);'
|
||||
print ' if (upperCaseVariantsAccepted.contains(entities[i])) {'
|
||||
print ' entityValues.put(entities[i].toUpperCase(), value);'
|
||||
print ' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);'
|
||||
print ' if (upperCaseVariant != null) {'
|
||||
print ' entityValues.put(upperCaseVariant, value);'
|
||||
print ' }'
|
||||
print ' }'
|
||||
print " }"
|
||||
|
Loading…
x
Reference in New Issue
Block a user