LUCENE-2847: Added ASL2 license to supplementary macros generator, and to the generated file, and set svn:eol-style to native for both of them.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-01-06 19:15:21 +00:00
parent fbfb07d904
commit 8d7d57abdc
2 changed files with 163 additions and 105 deletions

View File

@ -1,105 +1,125 @@
ALetterSupp = (
([\ud80d][\uDC00-\uDC2E])
| ([\ud80c][\uDC00-\uDFFF])
| ([\ud809][\uDC00-\uDC62])
| ([\ud808][\uDC00-\uDF6E])
| ([\ud81a][\uDC00-\uDE38])
| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
| ([\ud801][\uDC00-\uDC9D])
| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
| ([\ud803][\uDC00-\uDC48])
| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
)
FormatSupp = (
([\ud804][\uDCBD])
| ([\ud834][\uDD73-\uDD7A])
| ([\udb40][\uDC01\uDC20-\uDC7F])
)
ExtendSupp = (
([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
| ([\ud800][\uDDFD])
| ([\udb40][\uDD00-\uDDEF])
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
)
NumericSupp = (
([\ud804][\uDC66-\uDC6F])
| ([\ud835][\uDFCE-\uDFFF])
| ([\ud801][\uDCA0-\uDCA9])
)
KatakanaSupp = (
([\ud82c][\uDC00])
)
MidLetterSupp = (
[]
)
MidNumSupp = (
[]
)
MidNumLetSupp = (
[]
)
ExtendNumLetSupp = (
[]
)
ExtendNumLetSupp = (
[]
)
ComplexContextSupp = (
[]
)
HanSupp = (
([\ud87e][\uDC00-\uDE1D])
| ([\ud86b][\uDC00-\uDFFF])
| ([\ud86a][\uDC00-\uDFFF])
| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
| ([\ud868][\uDC00-\uDFFF])
| ([\ud86e][\uDC00-\uDC1D])
| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
| ([\ud86c][\uDC00-\uDFFF])
| ([\ud863][\uDC00-\uDFFF])
| ([\ud862][\uDC00-\uDFFF])
| ([\ud861][\uDC00-\uDFFF])
| ([\ud860][\uDC00-\uDFFF])
| ([\ud867][\uDC00-\uDFFF])
| ([\ud866][\uDC00-\uDFFF])
| ([\ud865][\uDC00-\uDFFF])
| ([\ud864][\uDC00-\uDFFF])
| ([\ud858][\uDC00-\uDFFF])
| ([\ud859][\uDC00-\uDFFF])
| ([\ud85a][\uDC00-\uDFFF])
| ([\ud85b][\uDC00-\uDFFF])
| ([\ud85c][\uDC00-\uDFFF])
| ([\ud85d][\uDC00-\uDFFF])
| ([\ud85e][\uDC00-\uDFFF])
| ([\ud85f][\uDC00-\uDFFF])
| ([\ud850][\uDC00-\uDFFF])
| ([\ud851][\uDC00-\uDFFF])
| ([\ud852][\uDC00-\uDFFF])
| ([\ud853][\uDC00-\uDFFF])
| ([\ud854][\uDC00-\uDFFF])
| ([\ud855][\uDC00-\uDFFF])
| ([\ud856][\uDC00-\uDFFF])
| ([\ud857][\uDC00-\uDFFF])
| ([\ud849][\uDC00-\uDFFF])
| ([\ud848][\uDC00-\uDFFF])
| ([\ud84b][\uDC00-\uDFFF])
| ([\ud84a][\uDC00-\uDFFF])
| ([\ud84d][\uDC00-\uDFFF])
| ([\ud84c][\uDC00-\uDFFF])
| ([\ud84f][\uDC00-\uDFFF])
| ([\ud84e][\uDC00-\uDFFF])
| ([\ud841][\uDC00-\uDFFF])
| ([\ud840][\uDC00-\uDFFF])
| ([\ud843][\uDC00-\uDFFF])
| ([\ud842][\uDC00-\uDFFF])
| ([\ud845][\uDC00-\uDFFF])
| ([\ud844][\uDC00-\uDFFF])
| ([\ud847][\uDC00-\uDFFF])
| ([\ud846][\uDC00-\uDFFF])
)
HiraganaSupp = (
([\ud83c][\uDE00])
| ([\ud82c][\uDC01])
)
/*
* Copyright 2010 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
ALetterSupp = (
([\ud80d][\uDC00-\uDC2E])
| ([\ud80c][\uDC00-\uDFFF])
| ([\ud809][\uDC00-\uDC62])
| ([\ud808][\uDC00-\uDF6E])
| ([\ud81a][\uDC00-\uDE38])
| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
| ([\ud801][\uDC00-\uDC9D])
| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
| ([\ud803][\uDC00-\uDC48])
| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
)
FormatSupp = (
([\ud804][\uDCBD])
| ([\ud834][\uDD73-\uDD7A])
| ([\udb40][\uDC01\uDC20-\uDC7F])
)
ExtendSupp = (
([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
| ([\ud800][\uDDFD])
| ([\udb40][\uDD00-\uDDEF])
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
)
NumericSupp = (
([\ud804][\uDC66-\uDC6F])
| ([\ud835][\uDFCE-\uDFFF])
| ([\ud801][\uDCA0-\uDCA9])
)
KatakanaSupp = (
([\ud82c][\uDC00])
)
MidLetterSupp = (
[]
)
MidNumSupp = (
[]
)
MidNumLetSupp = (
[]
)
ExtendNumLetSupp = (
[]
)
ExtendNumLetSupp = (
[]
)
ComplexContextSupp = (
[]
)
HanSupp = (
([\ud87e][\uDC00-\uDE1D])
| ([\ud86b][\uDC00-\uDFFF])
| ([\ud86a][\uDC00-\uDFFF])
| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
| ([\ud868][\uDC00-\uDFFF])
| ([\ud86e][\uDC00-\uDC1D])
| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
| ([\ud86c][\uDC00-\uDFFF])
| ([\ud863][\uDC00-\uDFFF])
| ([\ud862][\uDC00-\uDFFF])
| ([\ud861][\uDC00-\uDFFF])
| ([\ud860][\uDC00-\uDFFF])
| ([\ud867][\uDC00-\uDFFF])
| ([\ud866][\uDC00-\uDFFF])
| ([\ud865][\uDC00-\uDFFF])
| ([\ud864][\uDC00-\uDFFF])
| ([\ud858][\uDC00-\uDFFF])
| ([\ud859][\uDC00-\uDFFF])
| ([\ud85a][\uDC00-\uDFFF])
| ([\ud85b][\uDC00-\uDFFF])
| ([\ud85c][\uDC00-\uDFFF])
| ([\ud85d][\uDC00-\uDFFF])
| ([\ud85e][\uDC00-\uDFFF])
| ([\ud85f][\uDC00-\uDFFF])
| ([\ud850][\uDC00-\uDFFF])
| ([\ud851][\uDC00-\uDFFF])
| ([\ud852][\uDC00-\uDFFF])
| ([\ud853][\uDC00-\uDFFF])
| ([\ud854][\uDC00-\uDFFF])
| ([\ud855][\uDC00-\uDFFF])
| ([\ud856][\uDC00-\uDFFF])
| ([\ud857][\uDC00-\uDFFF])
| ([\ud849][\uDC00-\uDFFF])
| ([\ud848][\uDC00-\uDFFF])
| ([\ud84b][\uDC00-\uDFFF])
| ([\ud84a][\uDC00-\uDFFF])
| ([\ud84d][\uDC00-\uDFFF])
| ([\ud84c][\uDC00-\uDFFF])
| ([\ud84f][\uDC00-\uDFFF])
| ([\ud84e][\uDC00-\uDFFF])
| ([\ud841][\uDC00-\uDFFF])
| ([\ud840][\uDC00-\uDFFF])
| ([\ud843][\uDC00-\uDFFF])
| ([\ud842][\uDC00-\uDFFF])
| ([\ud845][\uDC00-\uDFFF])
| ([\ud844][\uDC00-\uDFFF])
| ([\ud847][\uDC00-\uDFFF])
| ([\ud846][\uDC00-\uDFFF])
)
HiraganaSupp = (
([\ud83c][\uDE00])
| ([\ud82c][\uDC01])
)

View File

@ -17,16 +17,46 @@ package org.apache.lucene.analysis.icu;
* limitations under the License.
*/
import java.text.DateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.TimeZone;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.VersionInfo;
/** creates a macro to augment jflex's unicode wordbreak support for > BMP */
public class GenerateJFlexSupplementaryMacros {
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
private static final String NL = System.getProperty("line.separator");
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US);
static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
}
private static final String APACHE_LICENSE
= "/*" + NL
+ " * Copyright 2010 The Apache Software Foundation." + NL
+ " *" + NL
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
+ " * you may not use this file except in compliance with the License." + NL
+ " * You may obtain a copy of the License at" + NL
+ " *" + NL
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
+ " *" + NL
+ " * Unless required by applicable law or agreed to in writing, software" + NL
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
+ " * See the License for the specific language governing permissions and" + NL
+ " * limitations under the License." + NL
+ " */" + NL + NL;
public static void main(String args[]) throws Exception {
outputHeader();
outputMacro("ALetterSupp", "[:WordBreak=ALetter:]");
outputMacro("FormatSupp", "[:WordBreak=Format:]");
outputMacro("ExtendSupp", "[:WordBreak=Extend:]");
@ -42,6 +72,14 @@ public class GenerateJFlexSupplementaryMacros {
outputMacro("HiraganaSupp", "[:Script=Hiragana:]");
}
static void outputHeader() {
System.out.print(APACHE_LICENSE);
System.out.print("// Generated using ICU4J " + VersionInfo.ICU_VERSION.toString() + " on ");
System.out.println(DATE_FORMAT.format(new Date()));
System.out.println("// by " + GenerateJFlexSupplementaryMacros.class.getName());
System.out.print(NL + NL);
}
// we have to carefully output the possibilities as compact utf-16
// range expressions, or jflex will OOM!
static void outputMacro(String name, String pattern) {