|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
|
|
|
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
|
|
|
|
|
|
|
|
|
package org.apache.lucene.analysis.charfilter;
|
|
|
|
|
|
|
|
|
@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
|
|
|
|
/**
|
|
|
|
|
* This class is a scanner generated by
|
|
|
|
|
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
|
|
|
|
* on 7/26/12 6:22 PM from the specification file
|
|
|
|
|
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
|
|
|
|
* on 8/6/12 11:57 AM from the specification file
|
|
|
|
|
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
|
|
|
|
*/
|
|
|
|
|
public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
|
|
|
|
@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
{ yybegin(STYLE);
|
|
|
|
|
}
|
|
|
|
|
case 55: break;
|
|
|
|
|
case 27:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
case 56: break;
|
|
|
|
|
case 30:
|
|
|
|
|
{ int length = yylength();
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, length);
|
|
|
|
|
entitySegment.clear();
|
|
|
|
|
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
|
|
|
|
entitySegment.append(ch);
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
|
|
|
}
|
|
|
|
|
case 57: break;
|
|
|
|
|
case 48:
|
|
|
|
|
{ inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
// add (previously matched input length) -- current match and substitution handled below
|
|
|
|
|
cumulativeDiff += yychar - inputStart;
|
|
|
|
|
// position the offset correction at (already output length) -- substitution handled below
|
|
|
|
|
int offsetCorrectionPos = outputCharCount;
|
|
|
|
|
int returnValue;
|
|
|
|
|
if (escapeSTYLE) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
returnValue = outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += yylength() - 1;
|
|
|
|
|
// add (substitution length)
|
|
|
|
|
++offsetCorrectionPos;
|
|
|
|
|
returnValue = STYLE_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
|
|
|
return returnValue;
|
|
|
|
|
}
|
|
|
|
|
case 58: break;
|
|
|
|
|
case 8:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(START_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 59: break;
|
|
|
|
|
case 2:
|
|
|
|
|
{ inputStart = yychar;
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
inputSegment.append('<');
|
|
|
|
|
yybegin(LEFT_ANGLE_BRACKET);
|
|
|
|
|
}
|
|
|
|
|
case 60: break;
|
|
|
|
|
case 44:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 61: break;
|
|
|
|
|
case 21:
|
|
|
|
|
{ previousRestoreState = restoreState;
|
|
|
|
|
restoreState = SERVER_SIDE_INCLUDE;
|
|
|
|
|
yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 62: break;
|
|
|
|
|
case 11:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
|
|
|
|
}
|
|
|
|
|
case 63: break;
|
|
|
|
|
case 35:
|
|
|
|
|
{ yybegin(SCRIPT);
|
|
|
|
|
}
|
|
|
|
|
case 64: break;
|
|
|
|
|
case 42:
|
|
|
|
|
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 65: break;
|
|
|
|
|
case 10:
|
|
|
|
|
{ inputSegment.append('!'); yybegin(BANG);
|
|
|
|
|
}
|
|
|
|
|
case 66: break;
|
|
|
|
|
case 51:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
inputSegment.append('#');
|
|
|
|
|
yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 56: break;
|
|
|
|
|
case 21:
|
|
|
|
|
case 67: break;
|
|
|
|
|
case 4:
|
|
|
|
|
{ yypushback(1);
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
outputSegment.restart();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 68: break;
|
|
|
|
|
case 43:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 69: break;
|
|
|
|
|
case 52:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
|
char highSurrogate = '\u0000';
|
|
|
|
|
try { // High surrogates are in decimal range [55296, 56319]
|
|
|
|
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing high surrogate '"
|
|
|
|
|
+ surrogatePair.substring(1, 6) + "'";
|
|
|
|
|
}
|
|
|
|
|
if (Character.isHighSurrogate(highSurrogate)) {
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
try {
|
|
|
|
|
outputSegment.unsafeWrite
|
|
|
|
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing low surrogate '"
|
|
|
|
|
+ surrogatePair.substring(10, 14) + "'";
|
|
|
|
|
}
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return highSurrogate;
|
|
|
|
|
}
|
|
|
|
|
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
|
|
|
|
inputSegment.append('#');
|
|
|
|
|
yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 70: break;
|
|
|
|
|
case 28:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 71: break;
|
|
|
|
|
case 50:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
|
char highSurrogate = '\u0000';
|
|
|
|
|
try {
|
|
|
|
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing high surrogate '"
|
|
|
|
|
+ surrogatePair.substring(2, 6) + "'";
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
outputSegment.unsafeWrite
|
|
|
|
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing low surrogate '"
|
|
|
|
|
+ surrogatePair.substring(10, 14) + "'";
|
|
|
|
|
}
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return highSurrogate;
|
|
|
|
|
}
|
|
|
|
|
case 72: break;
|
|
|
|
|
case 16:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 73: break;
|
|
|
|
|
case 22:
|
|
|
|
|
{ previousRestoreState = restoreState;
|
|
|
|
|
restoreState = SERVER_SIDE_INCLUDE;
|
|
|
|
|
yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 57: break;
|
|
|
|
|
case 74: break;
|
|
|
|
|
case 26:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 75: break;
|
|
|
|
|
case 20:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
}
|
|
|
|
|
case 76: break;
|
|
|
|
|
case 47:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(CDATA);
|
|
|
|
|
}
|
|
|
|
|
case 77: break;
|
|
|
|
|
case 33:
|
|
|
|
|
{ yybegin(YYINITIAL);
|
|
|
|
|
if (escapeBR) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.reset();
|
|
|
|
|
return BR_START_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 78: break;
|
|
|
|
|
case 23:
|
|
|
|
|
{ yybegin(restoreState); restoreState = previousRestoreState;
|
|
|
|
|
}
|
|
|
|
|
case 79: break;
|
|
|
|
|
case 32:
|
|
|
|
|
{ yybegin(COMMENT);
|
|
|
|
|
}
|
|
|
|
|
case 80: break;
|
|
|
|
|
case 24:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 81: break;
|
|
|
|
|
case 3:
|
|
|
|
|
{ inputStart = yychar;
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
inputSegment.append('&');
|
|
|
|
|
yybegin(AMPERSAND);
|
|
|
|
|
}
|
|
|
|
|
case 82: break;
|
|
|
|
|
case 46:
|
|
|
|
|
{ yybegin(SCRIPT);
|
|
|
|
|
if (escapeSCRIPT) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
inputStart += 1 + yylength();
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 83: break;
|
|
|
|
|
case 14:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 84: break;
|
|
|
|
|
case 6:
|
|
|
|
|
{ int matchLength = yylength();
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
|
|
|
|
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
|
|
|
|
String decimalCharRef = yytext();
|
|
|
|
|
int codePoint = 0;
|
|
|
|
|
try {
|
|
|
|
|
codePoint = Integer.parseInt(decimalCharRef);
|
|
|
|
|
} catch(Exception e) {
|
|
|
|
|
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
|
|
|
|
}
|
|
|
|
|
if (codePoint <= 0x10FFFF) {
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
if (codePoint >= Character.MIN_SURROGATE
|
|
|
|
|
&& codePoint <= Character.MAX_SURROGATE) {
|
|
|
|
|
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment.setLength
|
|
|
|
|
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
|
|
|
|
}
|
|
|
|
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 85: break;
|
|
|
|
|
case 34:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
|
|
|
|
cumulativeDiff += yychar - inputStart + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 86: break;
|
|
|
|
|
case 5:
|
|
|
|
|
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 87: break;
|
|
|
|
|
case 13:
|
|
|
|
|
{ inputSegment.append(zzBuffer[zzStartRead]);
|
|
|
|
|
}
|
|
|
|
|
case 88: break;
|
|
|
|
|
case 18:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(END_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 89: break;
|
|
|
|
|
case 40:
|
|
|
|
|
{ yybegin(SCRIPT_COMMENT);
|
|
|
|
|
}
|
|
|
|
|
case 90: break;
|
|
|
|
|
case 37:
|
|
|
|
|
{ // add (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 91: break;
|
|
|
|
|
case 12:
|
|
|
|
|
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
|
|
|
|
}
|
|
|
|
|
case 92: break;
|
|
|
|
|
case 9:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(START_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(START_TAG_TAIL_EXCLUDE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 93: break;
|
|
|
|
|
case 49:
|
|
|
|
|
{ inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
// add (previously matched input length) -- current match and substitution handled below
|
|
|
|
|
cumulativeDiff += yychar - inputStart;
|
|
|
|
|
// position at (already output length) -- substitution handled below
|
|
|
|
|
int offsetCorrectionPos = outputCharCount;
|
|
|
|
|
int returnValue;
|
|
|
|
|
if (escapeSCRIPT) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
returnValue = outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += yylength() - 1;
|
|
|
|
|
// add (substitution length)
|
|
|
|
|
++offsetCorrectionPos;
|
|
|
|
|
returnValue = SCRIPT_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
|
|
|
return returnValue;
|
|
|
|
|
}
|
|
|
|
|
case 94: break;
|
|
|
|
|
case 29:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 95: break;
|
|
|
|
|
case 17:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 96: break;
|
|
|
|
|
case 45:
|
|
|
|
|
{ yybegin(STYLE);
|
|
|
|
|
if (escapeSTYLE) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
inputStart += 1 + yylength();
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 97: break;
|
|
|
|
|
case 7:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 98: break;
|
|
|
|
|
case 19:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(END_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(END_TAG_TAIL_EXCLUDE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 99: break;
|
|
|
|
|
case 25:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
case 100: break;
|
|
|
|
|
case 31:
|
|
|
|
|
{ int matchLength = yylength();
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
|
|
|
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 58: break;
|
|
|
|
|
case 19:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(END_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(END_TAG_TAIL_EXCLUDE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 59: break;
|
|
|
|
|
case 2:
|
|
|
|
|
{ inputStart = yychar;
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
inputSegment.append('<');
|
|
|
|
|
yybegin(LEFT_ANGLE_BRACKET);
|
|
|
|
|
}
|
|
|
|
|
case 60: break;
|
|
|
|
|
case 27:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
case 61: break;
|
|
|
|
|
case 44:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 62: break;
|
|
|
|
|
case 35:
|
|
|
|
|
{ yybegin(SCRIPT);
|
|
|
|
|
}
|
|
|
|
|
case 63: break;
|
|
|
|
|
case 42:
|
|
|
|
|
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 64: break;
|
|
|
|
|
case 10:
|
|
|
|
|
{ inputSegment.append('!'); yybegin(BANG);
|
|
|
|
|
}
|
|
|
|
|
case 65: break;
|
|
|
|
|
case 33:
|
|
|
|
|
{ yybegin(YYINITIAL);
|
|
|
|
|
if (escapeBR) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.reset();
|
|
|
|
|
return BR_START_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 66: break;
|
|
|
|
|
case 101: break;
|
|
|
|
|
case 53:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
inputSegment.append('#');
|
|
|
|
|
yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 67: break;
|
|
|
|
|
case 43:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
|
|
|
}
|
|
|
|
|
case 68: break;
|
|
|
|
|
case 30:
|
|
|
|
|
{ int length = yylength();
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, length);
|
|
|
|
|
entitySegment.clear();
|
|
|
|
|
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
|
|
|
|
entitySegment.append(ch);
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
|
|
|
}
|
|
|
|
|
case 69: break;
|
|
|
|
|
case 28:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 70: break;
|
|
|
|
|
case 3:
|
|
|
|
|
{ inputStart = yychar;
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
inputSegment.append('&');
|
|
|
|
|
yybegin(AMPERSAND);
|
|
|
|
|
}
|
|
|
|
|
case 71: break;
|
|
|
|
|
case 16:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 72: break;
|
|
|
|
|
case 52:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
|
char highSurrogate = '\u0000';
|
|
|
|
|
try { // High surrogates are in decimal range [55296, 56319]
|
|
|
|
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing high surrogate '"
|
|
|
|
|
+ surrogatePair.substring(1, 6) + "'";
|
|
|
|
|
}
|
|
|
|
|
if (Character.isHighSurrogate(highSurrogate)) {
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
try {
|
|
|
|
|
outputSegment.unsafeWrite
|
|
|
|
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing low surrogate '"
|
|
|
|
|
+ surrogatePair.substring(10, 14) + "'";
|
|
|
|
|
}
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return highSurrogate;
|
|
|
|
|
}
|
|
|
|
|
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
|
|
|
|
inputSegment.append('#');
|
|
|
|
|
yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 73: break;
|
|
|
|
|
case 6:
|
|
|
|
|
{ int matchLength = yylength();
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
|
|
|
|
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
|
|
|
|
String decimalCharRef = yytext();
|
|
|
|
|
int codePoint = 0;
|
|
|
|
|
try {
|
|
|
|
|
codePoint = Integer.parseInt(decimalCharRef);
|
|
|
|
|
} catch(Exception e) {
|
|
|
|
|
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
|
|
|
|
}
|
|
|
|
|
if (codePoint <= 0x10FFFF) {
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
if (codePoint >= Character.MIN_SURROGATE
|
|
|
|
|
&& codePoint <= Character.MAX_SURROGATE) {
|
|
|
|
|
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment.setLength
|
|
|
|
|
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
|
|
|
|
}
|
|
|
|
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 74: break;
|
|
|
|
|
case 37:
|
|
|
|
|
{ // add (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 75: break;
|
|
|
|
|
case 8:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(START_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 76: break;
|
|
|
|
|
case 46:
|
|
|
|
|
{ yybegin(SCRIPT);
|
|
|
|
|
if (escapeSCRIPT) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
inputStart += 1 + yylength();
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 77: break;
|
|
|
|
|
case 11:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
|
|
|
|
}
|
|
|
|
|
case 78: break;
|
|
|
|
|
case 20:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
}
|
|
|
|
|
case 79: break;
|
|
|
|
|
case 34:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
|
|
|
|
cumulativeDiff += yychar - inputStart + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 80: break;
|
|
|
|
|
case 23:
|
|
|
|
|
{ yybegin(restoreState); restoreState = previousRestoreState;
|
|
|
|
|
}
|
|
|
|
|
case 81: break;
|
|
|
|
|
case 32:
|
|
|
|
|
{ yybegin(COMMENT);
|
|
|
|
|
}
|
|
|
|
|
case 82: break;
|
|
|
|
|
case 14:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 83: break;
|
|
|
|
|
case 18:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(END_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 84: break;
|
|
|
|
|
case 25:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
case 85: break;
|
|
|
|
|
case 7:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 86: break;
|
|
|
|
|
case 48:
|
|
|
|
|
{ inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
// add (previously matched input length) -- current match and substitution handled below
|
|
|
|
|
cumulativeDiff += yychar - inputStart;
|
|
|
|
|
// position the offset correction at (already output length) -- substitution handled below
|
|
|
|
|
int offsetCorrectionPos = outputCharCount;
|
|
|
|
|
int returnValue;
|
|
|
|
|
if (escapeSTYLE) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
returnValue = outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += yylength() - 1;
|
|
|
|
|
// add (substitution length)
|
|
|
|
|
++offsetCorrectionPos;
|
|
|
|
|
returnValue = STYLE_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
|
|
|
return returnValue;
|
|
|
|
|
}
|
|
|
|
|
case 87: break;
|
|
|
|
|
case 5:
|
|
|
|
|
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
|
|
|
|
}
|
|
|
|
|
case 88: break;
|
|
|
|
|
case 26:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
}
|
|
|
|
|
case 89: break;
|
|
|
|
|
case 13:
|
|
|
|
|
{ inputSegment.append(zzBuffer[zzStartRead]);
|
|
|
|
|
}
|
|
|
|
|
case 90: break;
|
|
|
|
|
case 50:
|
|
|
|
|
{ // Handle paired UTF-16 surrogates.
|
|
|
|
|
outputSegment = entitySegment;
|
|
|
|
|
outputSegment.clear();
|
|
|
|
|
String surrogatePair = yytext();
|
|
|
|
|
char highSurrogate = '\u0000';
|
|
|
|
|
try {
|
|
|
|
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing high surrogate '"
|
|
|
|
|
+ surrogatePair.substring(2, 6) + "'";
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
outputSegment.unsafeWrite
|
|
|
|
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
|
|
|
} catch(Exception e) { // should never happen
|
|
|
|
|
assert false: "Exception parsing low surrogate '"
|
|
|
|
|
+ surrogatePair.substring(10, 14) + "'";
|
|
|
|
|
}
|
|
|
|
|
// add (previously matched input length) + (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
|
|
|
// position the correction at (already output length) + (substitution length)
|
|
|
|
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return highSurrogate;
|
|
|
|
|
}
|
|
|
|
|
case 91: break;
|
|
|
|
|
case 40:
|
|
|
|
|
{ yybegin(SCRIPT_COMMENT);
|
|
|
|
|
}
|
|
|
|
|
case 92: break;
|
|
|
|
|
case 45:
|
|
|
|
|
{ yybegin(STYLE);
|
|
|
|
|
if (escapeSTYLE) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
inputStart += 1 + yylength();
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 93: break;
|
|
|
|
|
case 22:
|
|
|
|
|
{ previousRestoreState = restoreState;
|
|
|
|
|
restoreState = SERVER_SIDE_INCLUDE;
|
|
|
|
|
yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 94: break;
|
|
|
|
|
case 12:
|
|
|
|
|
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
|
|
|
|
}
|
|
|
|
|
case 95: break;
|
|
|
|
|
case 102: break;
|
|
|
|
|
case 36:
|
|
|
|
|
{ yybegin(YYINITIAL);
|
|
|
|
|
if (escapeBR) {
|
|
|
|
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|
|
|
|
return BR_END_TAG_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 96: break;
|
|
|
|
|
case 24:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 97: break;
|
|
|
|
|
case 47:
|
|
|
|
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
|
|
|
cumulativeDiff += inputSegment.length() + yylength();
|
|
|
|
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
|
|
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
|
|
|
inputSegment.clear();
|
|
|
|
|
yybegin(CDATA);
|
|
|
|
|
}
|
|
|
|
|
case 98: break;
|
|
|
|
|
case 29:
|
|
|
|
|
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 99: break;
|
|
|
|
|
case 17:
|
|
|
|
|
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
|
|
|
}
|
|
|
|
|
case 100: break;
|
|
|
|
|
case 9:
|
|
|
|
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
if (null != escapedTags
|
|
|
|
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
|
|
|
yybegin(START_TAG_TAIL_INCLUDE);
|
|
|
|
|
} else {
|
|
|
|
|
yybegin(START_TAG_TAIL_EXCLUDE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case 101: break;
|
|
|
|
|
case 49:
|
|
|
|
|
{ inputSegment.clear();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
// add (previously matched input length) -- current match and substitution handled below
|
|
|
|
|
cumulativeDiff += yychar - inputStart;
|
|
|
|
|
// position at (already output length) -- substitution handled below
|
|
|
|
|
int offsetCorrectionPos = outputCharCount;
|
|
|
|
|
int returnValue;
|
|
|
|
|
if (escapeSCRIPT) {
|
|
|
|
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
returnValue = outputSegment.nextChar();
|
|
|
|
|
} else {
|
|
|
|
|
// add (this match length) - (substitution length)
|
|
|
|
|
cumulativeDiff += yylength() - 1;
|
|
|
|
|
// add (substitution length)
|
|
|
|
|
++offsetCorrectionPos;
|
|
|
|
|
returnValue = SCRIPT_REPLACEMENT;
|
|
|
|
|
}
|
|
|
|
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
|
|
|
return returnValue;
|
|
|
|
|
}
|
|
|
|
|
case 102: break;
|
|
|
|
|
case 103: break;
|
|
|
|
|
case 38:
|
|
|
|
|
{ yybegin(restoreState);
|
|
|
|
|
}
|
|
|
|
|
case 103: break;
|
|
|
|
|
case 104: break;
|
|
|
|
|
case 41:
|
|
|
|
|
{ yybegin(STYLE_COMMENT);
|
|
|
|
|
}
|
|
|
|
|
case 104: break;
|
|
|
|
|
case 105: break;
|
|
|
|
|
case 1:
|
|
|
|
|
{ return zzBuffer[zzStartRead];
|
|
|
|
|
}
|
|
|
|
|
case 105: break;
|
|
|
|
|
case 4:
|
|
|
|
|
{ yypushback(1);
|
|
|
|
|
outputSegment = inputSegment;
|
|
|
|
|
outputSegment.restart();
|
|
|
|
|
yybegin(YYINITIAL);
|
|
|
|
|
return outputSegment.nextChar();
|
|
|
|
|
}
|
|
|
|
|
case 106: break;
|
|
|
|
|
default:
|
|
|
|
|
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
|
|
|
|