mirror of https://github.com/apache/lucene.git
LUCENE-9699: Support German-like compound words (#2248)
This commit is contained in:
parent
38ec2602ce
commit
a176308aa6
|
@ -141,8 +141,9 @@ public class Dictionary {
|
||||||
char keepcase;
|
char keepcase;
|
||||||
char needaffix;
|
char needaffix;
|
||||||
char forbiddenword;
|
char forbiddenword;
|
||||||
char onlyincompound;
|
char onlyincompound, compoundBegin, compoundMiddle, compoundEnd, compoundPermit;
|
||||||
int compoundMin = 3;
|
boolean checkCompoundCase;
|
||||||
|
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
|
||||||
List<CompoundRule> compoundRules; // nullable
|
List<CompoundRule> compoundRules; // nullable
|
||||||
|
|
||||||
// ignored characters (dictionary, affix, inputs)
|
// ignored characters (dictionary, affix, inputs)
|
||||||
|
@ -377,8 +378,20 @@ public class Dictionary {
|
||||||
forbiddenword = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
forbiddenword = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
} else if ("COMPOUNDMIN".equals(firstWord)) {
|
} else if ("COMPOUNDMIN".equals(firstWord)) {
|
||||||
compoundMin = Math.max(1, Integer.parseInt(singleArgument(reader, line)));
|
compoundMin = Math.max(1, Integer.parseInt(singleArgument(reader, line)));
|
||||||
|
} else if ("COMPOUNDWORDMAX".equals(firstWord)) {
|
||||||
|
compoundMax = Math.max(1, Integer.parseInt(singleArgument(reader, line)));
|
||||||
} else if ("COMPOUNDRULE".equals(firstWord)) {
|
} else if ("COMPOUNDRULE".equals(firstWord)) {
|
||||||
compoundRules = parseCompoundRules(reader, Integer.parseInt(singleArgument(reader, line)));
|
compoundRules = parseCompoundRules(reader, Integer.parseInt(singleArgument(reader, line)));
|
||||||
|
} else if ("COMPOUNDBEGIN".equals(firstWord)) {
|
||||||
|
compoundBegin = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
|
} else if ("COMPOUNDMIDDLE".equals(firstWord)) {
|
||||||
|
compoundMiddle = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
|
} else if ("COMPOUNDEND".equals(firstWord)) {
|
||||||
|
compoundEnd = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
|
} else if ("COMPOUNDPERMITFLAG".equals(firstWord)) {
|
||||||
|
compoundPermit = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
|
} else if ("CHECKCOMPOUNDCASE".equals(firstWord)) {
|
||||||
|
checkCompoundCase = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1303,10 +1316,6 @@ public class Dictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasCompounding() {
|
|
||||||
return compoundRules != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean hasFlag(int entryId, char flag, BytesRef scratch) {
|
boolean hasFlag(int entryId, char flag, BytesRef scratch) {
|
||||||
return flag != FLAG_UNSET && hasFlag(decodeFlags(entryId, scratch), flag);
|
return flag != FLAG_UNSET && hasFlag(decodeFlags(entryId, scratch), flag);
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,18 +87,54 @@ public class SpellChecker {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!stemmer.doStem(wordChars, length, caseVariant).isEmpty()) {
|
if (hasStems(wordChars, 0, length, caseVariant, WordContext.SIMPLE_WORD)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary.hasCompounding()) {
|
if (dictionary.compoundRules != null
|
||||||
return checkCompounds(wordChars, 0, length, new ArrayList<>());
|
&& checkCompoundRules(wordChars, 0, length, new ArrayList<>())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dictionary.compoundBegin > 0 && checkCompounds(wordChars, 0, length, caseVariant, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasStems(
|
||||||
|
char[] chars, int offset, int length, boolean caseVariant, WordContext context) {
|
||||||
|
return !stemmer.doStem(chars, offset, length, caseVariant, context).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkCompounds(
|
||||||
|
char[] chars, int offset, int length, boolean caseVariant, int depth) {
|
||||||
|
if (depth > dictionary.compoundMax - 2) return false;
|
||||||
|
|
||||||
|
int limit = length - dictionary.compoundMin + 1;
|
||||||
|
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
|
||||||
|
WordContext context = depth == 0 ? WordContext.COMPOUND_BEGIN : WordContext.COMPOUND_MIDDLE;
|
||||||
|
int breakOffset = offset + breakPos;
|
||||||
|
if (checkCompoundCase(chars, breakOffset)
|
||||||
|
&& hasStems(chars, offset, breakPos, caseVariant, context)) {
|
||||||
|
int remainingLength = length - breakPos;
|
||||||
|
if (hasStems(chars, breakOffset, remainingLength, caseVariant, WordContext.COMPOUND_END)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (checkCompounds(chars, breakOffset, remainingLength, caseVariant, depth + 1)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkCompounds(char[] wordChars, int offset, int length, List<IntsRef> words) {
|
private boolean checkCompoundCase(char[] chars, int breakPos) {
|
||||||
|
if (!dictionary.checkCompoundCase) return true;
|
||||||
|
return Character.isUpperCase(chars[breakPos - 1]) == Character.isUpperCase(chars[breakPos]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkCompoundRules(
|
||||||
|
char[] wordChars, int offset, int length, List<IntsRef> words) {
|
||||||
if (words.size() >= 100) return false;
|
if (words.size() >= 100) return false;
|
||||||
|
|
||||||
int limit = length - dictionary.compoundMin + 1;
|
int limit = length - dictionary.compoundMin + 1;
|
||||||
|
@ -113,7 +149,7 @@ public class SpellChecker {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (checkCompounds(wordChars, offset + breakPos, length - breakPos, words)) {
|
if (checkCompoundRules(wordChars, offset + breakPos, length - breakPos, words)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,8 +168,7 @@ public class SpellChecker {
|
||||||
|
|
||||||
words.add(forms);
|
words.add(forms);
|
||||||
boolean result =
|
boolean result =
|
||||||
dictionary.compoundRules != null
|
dictionary.compoundRules.stream().anyMatch(r -> r.fullyMatches(words, scratch));
|
||||||
&& dictionary.compoundRules.stream().anyMatch(r -> r.fullyMatches(words, scratch));
|
|
||||||
words.remove(words.size() - 1);
|
words.remove(words.size() - 1);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,18 +94,18 @@ final class Stemmer {
|
||||||
}
|
}
|
||||||
|
|
||||||
WordCase wordCase = caseOf(word, length);
|
WordCase wordCase = caseOf(word, length);
|
||||||
List<CharsRef> list = doStem(word, length, false);
|
List<CharsRef> list = doStem(word, 0, length, false, WordContext.SIMPLE_WORD);
|
||||||
if (wordCase == WordCase.UPPER) {
|
if (wordCase == WordCase.UPPER) {
|
||||||
caseFoldTitle(word, length);
|
caseFoldTitle(word, length);
|
||||||
char[] aposCase = capitalizeAfterApostrophe(titleBuffer, length);
|
char[] aposCase = capitalizeAfterApostrophe(titleBuffer, length);
|
||||||
if (aposCase != null) {
|
if (aposCase != null) {
|
||||||
list.addAll(doStem(aposCase, length, true));
|
list.addAll(doStem(aposCase, 0, length, true, WordContext.SIMPLE_WORD));
|
||||||
}
|
}
|
||||||
list.addAll(doStem(titleBuffer, length, true));
|
list.addAll(doStem(titleBuffer, 0, length, true, WordContext.SIMPLE_WORD));
|
||||||
}
|
}
|
||||||
if (wordCase == WordCase.UPPER || wordCase == WordCase.TITLE) {
|
if (wordCase == WordCase.UPPER || wordCase == WordCase.TITLE) {
|
||||||
caseFoldLower(wordCase == WordCase.UPPER ? titleBuffer : word, length);
|
caseFoldLower(wordCase == WordCase.UPPER ? titleBuffer : word, length);
|
||||||
list.addAll(doStem(lowerBuffer, length, true));
|
list.addAll(doStem(lowerBuffer, 0, length, true, WordContext.SIMPLE_WORD));
|
||||||
}
|
}
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
@ -158,9 +158,10 @@ final class Stemmer {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<CharsRef> doStem(char[] word, int length, boolean caseVariant) {
|
List<CharsRef> doStem(
|
||||||
|
char[] word, int offset, int length, boolean caseVariant, WordContext context) {
|
||||||
List<CharsRef> stems = new ArrayList<>();
|
List<CharsRef> stems = new ArrayList<>();
|
||||||
IntsRef forms = dictionary.lookupWord(word, 0, length);
|
IntsRef forms = dictionary.lookupWord(word, offset, length);
|
||||||
if (forms != null) {
|
if (forms != null) {
|
||||||
for (int i = 0; i < forms.length; i += formStep) {
|
for (int i = 0; i < forms.length; i += formStep) {
|
||||||
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
|
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
|
||||||
|
@ -172,15 +173,32 @@ final class Stemmer {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// we can't add this form, it only belongs inside a compound word
|
// we can't add this form, it only belongs inside a compound word
|
||||||
if (Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
|
if (!context.isCompound() && Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
stems.add(newStem(word, 0, length, forms, i));
|
if (context.isCompound()
|
||||||
|
&& !Dictionary.hasFlag(wordFlags, context.requiredFlag(dictionary))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
stems.add(newStem(word, offset, length, forms, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
stems.addAll(
|
stems.addAll(
|
||||||
stem(word, 0, length, -1, (char) 0, -1, 0, true, true, false, false, caseVariant));
|
stem(
|
||||||
|
word,
|
||||||
|
offset,
|
||||||
|
length,
|
||||||
|
context,
|
||||||
|
-1,
|
||||||
|
(char) 0,
|
||||||
|
-1,
|
||||||
|
0,
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
caseVariant));
|
||||||
} catch (IOException bogus) {
|
} catch (IOException bogus) {
|
||||||
throw new RuntimeException(bogus);
|
throw new RuntimeException(bogus);
|
||||||
}
|
}
|
||||||
|
@ -287,6 +305,7 @@ final class Stemmer {
|
||||||
char[] word,
|
char[] word,
|
||||||
int offset,
|
int offset,
|
||||||
int length,
|
int length,
|
||||||
|
WordContext context,
|
||||||
int previous,
|
int previous,
|
||||||
char prevFlag,
|
char prevFlag,
|
||||||
int prefixId,
|
int prefixId,
|
||||||
|
@ -328,7 +347,7 @@ final class Stemmer {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isAffixCompatible(prefix, prevFlag, recursionDepth, false)) {
|
if (isAffixCompatible(prefix, prevFlag, recursionDepth, true, false, context)) {
|
||||||
char[] strippedWord = stripAffix(word, offset, length, i, prefix, true);
|
char[] strippedWord = stripAffix(word, offset, length, i, prefix, true);
|
||||||
if (strippedWord == null) {
|
if (strippedWord == null) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -340,6 +359,7 @@ final class Stemmer {
|
||||||
strippedWord,
|
strippedWord,
|
||||||
pureAffix ? offset + i : 0,
|
pureAffix ? offset + i : 0,
|
||||||
pureAffix ? length - i : strippedWord.length,
|
pureAffix ? length - i : strippedWord.length,
|
||||||
|
context,
|
||||||
prefix,
|
prefix,
|
||||||
-1,
|
-1,
|
||||||
recursionDepth,
|
recursionDepth,
|
||||||
|
@ -378,7 +398,8 @@ final class Stemmer {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isAffixCompatible(suffix, prevFlag, recursionDepth, previousWasPrefix)) {
|
if (isAffixCompatible(
|
||||||
|
suffix, prevFlag, recursionDepth, false, previousWasPrefix, context)) {
|
||||||
char[] strippedWord = stripAffix(word, offset, length, length - i, suffix, false);
|
char[] strippedWord = stripAffix(word, offset, length, length - i, suffix, false);
|
||||||
if (strippedWord == null) {
|
if (strippedWord == null) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -390,6 +411,7 @@ final class Stemmer {
|
||||||
strippedWord,
|
strippedWord,
|
||||||
pureAffix ? offset : 0,
|
pureAffix ? offset : 0,
|
||||||
pureAffix ? i : strippedWord.length,
|
pureAffix ? i : strippedWord.length,
|
||||||
|
context,
|
||||||
suffix,
|
suffix,
|
||||||
prefixId,
|
prefixId,
|
||||||
recursionDepth,
|
recursionDepth,
|
||||||
|
@ -442,18 +464,31 @@ final class Stemmer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isAffixCompatible(
|
private boolean isAffixCompatible(
|
||||||
int affix, char prevFlag, int recursionDepth, boolean previousWasPrefix) {
|
int affix,
|
||||||
|
char prevFlag,
|
||||||
|
int recursionDepth,
|
||||||
|
boolean isPrefix,
|
||||||
|
boolean previousWasPrefix,
|
||||||
|
WordContext context) {
|
||||||
int append = dictionary.affixData(affix, Dictionary.AFFIX_APPEND);
|
int append = dictionary.affixData(affix, Dictionary.AFFIX_APPEND);
|
||||||
|
|
||||||
|
if (context.isCompound() && dictionary.compoundPermit > 0) {
|
||||||
|
WordContext allowed = isPrefix ? WordContext.COMPOUND_BEGIN : WordContext.COMPOUND_END;
|
||||||
|
if (context != allowed && !dictionary.hasFlag(append, dictionary.compoundPermit, scratch)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (recursionDepth == 0) {
|
if (recursionDepth == 0) {
|
||||||
// check if affix is allowed in a non-compound word
|
// check if affix is allowed in a non-compound word
|
||||||
return !dictionary.hasFlag(append, dictionary.onlyincompound, scratch);
|
return context.isCompound()
|
||||||
|
|| !dictionary.hasFlag(append, dictionary.onlyincompound, scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isCrossProduct(affix)) {
|
if (isCrossProduct(affix)) {
|
||||||
// cross check incoming continuation class (flag of previous affix) against list.
|
// cross check incoming continuation class (flag of previous affix) against list.
|
||||||
char[] appendFlags = dictionary.decodeFlags(append, scratch);
|
char[] appendFlags = dictionary.decodeFlags(append, scratch);
|
||||||
if (!Dictionary.hasFlag(appendFlags, dictionary.onlyincompound)) {
|
if (context.isCompound() || !Dictionary.hasFlag(appendFlags, dictionary.onlyincompound)) {
|
||||||
return previousWasPrefix || Dictionary.hasFlag(appendFlags, prevFlag);
|
return previousWasPrefix || Dictionary.hasFlag(appendFlags, prevFlag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -491,8 +526,9 @@ final class Stemmer {
|
||||||
/**
|
/**
|
||||||
* Applies the affix rule to the given word, producing a list of stems if any are found
|
* Applies the affix rule to the given word, producing a list of stems if any are found
|
||||||
*
|
*
|
||||||
* @param strippedWord Word the affix has been removed and the strip added
|
* @param strippedWord Char array containing the word with the affix removed and the strip added
|
||||||
* @param length valid length of stripped word
|
* @param offset where the word actually starts in the array
|
||||||
|
* @param length the length of the stripped word
|
||||||
* @param affix HunspellAffix representing the affix rule itself
|
* @param affix HunspellAffix representing the affix rule itself
|
||||||
* @param prefixId when we already stripped a prefix, we can't simply recurse and check the
|
* @param prefixId when we already stripped a prefix, we can't simply recurse and check the
|
||||||
* suffix, unless both are compatible so we must check dictionary form against both to add it
|
* suffix, unless both are compatible so we must check dictionary form against both to add it
|
||||||
|
@ -505,6 +541,7 @@ final class Stemmer {
|
||||||
char[] strippedWord,
|
char[] strippedWord,
|
||||||
int offset,
|
int offset,
|
||||||
int length,
|
int length,
|
||||||
|
WordContext context,
|
||||||
int affix,
|
int affix,
|
||||||
int prefixId,
|
int prefixId,
|
||||||
int recursionDepth,
|
int recursionDepth,
|
||||||
|
@ -546,10 +583,15 @@ final class Stemmer {
|
||||||
if (!acceptCase(caseVariant, wordFlags)) {
|
if (!acceptCase(caseVariant, wordFlags)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// we aren't decompounding (yet)
|
if (!context.isCompound() && Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
|
||||||
if (Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (context.isCompound()) {
|
||||||
|
char cFlag = context.requiredFlag(dictionary);
|
||||||
|
if (!Dictionary.hasFlag(wordFlags, cFlag) && !isFlagAppendedByAffix(affix, cFlag)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
stems.add(newStem(strippedWord, offset, length, forms, i));
|
stems.add(newStem(strippedWord, offset, length, forms, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -594,6 +636,7 @@ final class Stemmer {
|
||||||
strippedWord,
|
strippedWord,
|
||||||
offset,
|
offset,
|
||||||
length,
|
length,
|
||||||
|
context,
|
||||||
affix,
|
affix,
|
||||||
flag,
|
flag,
|
||||||
prefixId,
|
prefixId,
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
|
enum WordContext {
|
||||||
|
SIMPLE_WORD,
|
||||||
|
COMPOUND_BEGIN,
|
||||||
|
COMPOUND_MIDDLE,
|
||||||
|
COMPOUND_END;
|
||||||
|
|
||||||
|
boolean isCompound() {
|
||||||
|
return this != SIMPLE_WORD;
|
||||||
|
}
|
||||||
|
|
||||||
|
char requiredFlag(Dictionary dictionary) {
|
||||||
|
switch (this) {
|
||||||
|
case COMPOUND_BEGIN:
|
||||||
|
return dictionary.compoundBegin;
|
||||||
|
case COMPOUND_MIDDLE:
|
||||||
|
return dictionary.compoundMiddle;
|
||||||
|
case COMPOUND_END:
|
||||||
|
return dictionary.compoundEnd;
|
||||||
|
default:
|
||||||
|
return Dictionary.FLAG_UNSET;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -80,6 +80,10 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
doTest("compoundrule8");
|
doTest("compoundrule8");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGermanCompounding() throws Exception {
|
||||||
|
doTest("germancompounding");
|
||||||
|
}
|
||||||
|
|
||||||
protected void doTest(String name) throws Exception {
|
protected void doTest(String name) throws Exception {
|
||||||
InputStream affixStream =
|
InputStream affixStream =
|
||||||
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
|
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
# German compounding
|
||||||
|
|
||||||
|
# handle special casing of German sharp s
|
||||||
|
|
||||||
|
CHECKSHARPS
|
||||||
|
|
||||||
|
# compound flags
|
||||||
|
|
||||||
|
COMPOUNDBEGIN U
|
||||||
|
COMPOUNDMIDDLE V
|
||||||
|
COMPOUNDEND W
|
||||||
|
|
||||||
|
# Prefixes are allowed at the beginning of compounds,
|
||||||
|
# suffixes are allowed at the end of compounds by default:
|
||||||
|
# (prefix)?(root)+(affix)?
|
||||||
|
# Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
|
||||||
|
COMPOUNDPERMITFLAG P
|
||||||
|
|
||||||
|
# for German fogemorphemes (Fuge-element)
|
||||||
|
# Hint: ONLYINCOMPOUND is not required everywhere, but the
|
||||||
|
# checking will be a little faster with it.
|
||||||
|
|
||||||
|
ONLYINCOMPOUND X
|
||||||
|
|
||||||
|
# forbid uppercase characters at compound word bounds
|
||||||
|
CHECKCOMPOUNDCASE
|
||||||
|
|
||||||
|
# for handling Fuge-elements with dashes (Arbeits-)
|
||||||
|
# dash will be a special word
|
||||||
|
|
||||||
|
COMPOUNDMIN 1
|
||||||
|
WORDCHARS -
|
||||||
|
|
||||||
|
# compound settings and fogemorpheme for `Arbeit'
|
||||||
|
|
||||||
|
SFX A Y 3
|
||||||
|
SFX A 0 s/UPX .
|
||||||
|
SFX A 0 s/VPDX .
|
||||||
|
SFX A 0 0/WXD .
|
||||||
|
|
||||||
|
SFX B Y 2
|
||||||
|
SFX B 0 0/UPX .
|
||||||
|
SFX B 0 0/VWXDP .
|
||||||
|
|
||||||
|
# a suffix for `Computer'
|
||||||
|
|
||||||
|
SFX C Y 1
|
||||||
|
SFX C 0 n/WD .
|
||||||
|
|
||||||
|
# for forbid exceptions (*Arbeitsnehmer)
|
||||||
|
|
||||||
|
FORBIDDENWORD Z
|
||||||
|
|
||||||
|
# dash prefix for compounds with dash (Arbeits-Computer)
|
||||||
|
|
||||||
|
PFX - Y 1
|
||||||
|
PFX - 0 -/P .
|
||||||
|
|
||||||
|
# decapitalizing prefix
|
||||||
|
# circumfix for positioning in compounds
|
||||||
|
|
||||||
|
PFX D Y 29
|
||||||
|
PFX D A a/PX A
|
||||||
|
PFX D Ä ä/PX Ä
|
||||||
|
PFX D B b/PX B
|
||||||
|
PFX D C c/PX C
|
||||||
|
PFX D D d/PX D
|
||||||
|
PFX D E e/PX E
|
||||||
|
PFX D F f/PX F
|
||||||
|
PFX D G g/PX G
|
||||||
|
PFX D H h/PX H
|
||||||
|
PFX D I i/PX I
|
||||||
|
PFX D J j/PX J
|
||||||
|
PFX D K k/PX K
|
||||||
|
PFX D L l/PX L
|
||||||
|
PFX D M m/PX M
|
||||||
|
PFX D N n/PX N
|
||||||
|
PFX D O o/PX O
|
||||||
|
PFX D Ö ö/PX Ö
|
||||||
|
PFX D P p/PX P
|
||||||
|
PFX D Q q/PX Q
|
||||||
|
PFX D R r/PX R
|
||||||
|
PFX D S s/PX S
|
||||||
|
PFX D T t/PX T
|
||||||
|
PFX D U u/PX U
|
||||||
|
PFX D Ü ü/PX Ü
|
||||||
|
PFX D V v/PX V
|
||||||
|
PFX D W w/PX W
|
||||||
|
PFX D X x/PX X
|
||||||
|
PFX D Y y/PX Y
|
||||||
|
PFX D Z z/PX Z
|
|
@ -0,0 +1,5 @@
|
||||||
|
4
|
||||||
|
Arbeit/A-
|
||||||
|
Computer/BC-
|
||||||
|
-/W
|
||||||
|
Arbeitsnehmer/Z
|
|
@ -0,0 +1,20 @@
|
||||||
|
Computer
|
||||||
|
Computern
|
||||||
|
Arbeit
|
||||||
|
Arbeits-
|
||||||
|
Computerarbeit
|
||||||
|
Computerarbeits-
|
||||||
|
Arbeitscomputer
|
||||||
|
Computercomputer
|
||||||
|
Computercomputern
|
||||||
|
Arbeitscomputern
|
||||||
|
Computerarbeitscomputer
|
||||||
|
Computerarbeitscomputern
|
||||||
|
Arbeitscomputercomputer
|
||||||
|
Computercomputerarbeit
|
||||||
|
Arbeitscomputerarbeit
|
||||||
|
Arbeitsarbeitsarbeit
|
||||||
|
Computerarbeitsarbeit
|
||||||
|
Computerarbeits-Computer
|
||||||
|
Computerarbeits-Computern
|
||||||
|
Computer-Arbeit
|
|
@ -0,0 +1,50 @@
|
||||||
|
computer
|
||||||
|
computern
|
||||||
|
arbeit
|
||||||
|
Arbeits
|
||||||
|
arbeits
|
||||||
|
ComputerArbeit
|
||||||
|
ComputernArbeit
|
||||||
|
Computernarbeit
|
||||||
|
ComputerArbeits
|
||||||
|
Arbeitcomputer
|
||||||
|
Arbeitcomputern
|
||||||
|
ArbeitsComputer
|
||||||
|
ArbeitsComputern
|
||||||
|
Computerarbeitcomputer
|
||||||
|
ComputerArbeitcomputer
|
||||||
|
ComputerArbeitscomputer
|
||||||
|
Computerarbeitcomputern
|
||||||
|
ComputerArbeitcomputern
|
||||||
|
ComputerArbeitscomputern
|
||||||
|
Arbeitscomputerarbeits
|
||||||
|
Arbeitscomputernarbeits
|
||||||
|
Computerarbeits-computer
|
||||||
|
Arbeitsnehmer
|
||||||
|
computers
|
||||||
|
computern
|
||||||
|
computernarbeit
|
||||||
|
computernArbeit
|
||||||
|
computerArbeit
|
||||||
|
computerArbeits
|
||||||
|
arbeitcomputer
|
||||||
|
arbeitsComputer
|
||||||
|
computerarbeitcomputer
|
||||||
|
computerArbeitcomputer
|
||||||
|
computerArbeitscomputer
|
||||||
|
arbeitscomputerarbeits
|
||||||
|
computerarbeits-computer
|
||||||
|
arbeitsnehmer
|
||||||
|
computernarbeit
|
||||||
|
computernArbeit
|
||||||
|
arbeits-
|
||||||
|
computerarbeit
|
||||||
|
computerarbeits-
|
||||||
|
arbeitscomputer
|
||||||
|
arbeitscomputern
|
||||||
|
computerarbeitscomputer
|
||||||
|
computerarbeitscomputern
|
||||||
|
computerarbeitscomputers
|
||||||
|
arbeitscomputerarbeit
|
||||||
|
computerarbeits-Computer
|
||||||
|
computerarbeits-Computern
|
Loading…
Reference in New Issue