make sure we are consistent for a given field name when we randomly enable term vector options

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5611@1591009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-04-29 15:56:46 +00:00
parent c4b632ef7f
commit 3b52e277ce
2 changed files with 56 additions and 37 deletions

View File

@ -291,13 +291,13 @@ public class AttributeSource {
* <p>
* The caller must pass in a Class&lt;? extends Attribute&gt; value.
*
* @returns instance of the passed in Attribute, or {@code null} if this AttributeSource
* does not contain the Attribute. It is recommended to always use
* {@link #addAttribute} even in consumers of TokenStreams, because you cannot
* know if a specific TokenStream really uses a specific Attribute.
* {@link #addAttribute} will automatically make the attribute available.
* If you want to only use the attribute, if it is available (to optimize
* consuming), use {@link #hasAttribute}.
* @return instance of the passed in Attribute, or {@code null} if this AttributeSource
* does not contain the Attribute. It is recommended to always use
* {@link #addAttribute} even in consumers of TokenStreams, because you cannot
* know if a specific TokenStream really uses a specific Attribute.
* {@link #addAttribute} will automatically make the attribute available.
* If you want to only use the attribute, if it is available (to optimize
* consuming), use {@link #hasAttribute}.
*/
public final <T extends Attribute> T getAttribute(Class<T> attClass) {
return attClass.cast(attributes.get(attClass));

View File

@ -603,8 +603,7 @@ public abstract class LuceneTestCase extends Assert {
.around(new TestRuleFieldCacheSanity())
.around(parentChainCallRule);
private static final Map<String,FieldType> fieldTermVectorOptions = new HashMap<String,FieldType>();
private static final Set<String> fieldNamesSeen = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
private static final Map<String,FieldType> fieldToType = new HashMap<String,FieldType>();
// -----------------------------------------------------------------
// Suite and test case setup/ cleanup.
@ -624,8 +623,7 @@ public abstract class LuceneTestCase extends Assert {
@After
public void tearDown() throws Exception {
parentChainCallRule.teardownCalled = true;
fieldTermVectorOptions.clear();
fieldNamesSeen.clear();
fieldToType.clear();
}
@ -1176,11 +1174,42 @@ public abstract class LuceneTestCase extends Assert {
return newField(random(), name, value, type);
}
public static Field newField(Random random, String name, String value, FieldType type) {
/** Returns a FieldType derived from newType but whose
* term vector options match the old type */
private static FieldType mergeTermVectorOptions(FieldType newType, FieldType oldType) {
if (newType.indexed() && oldType.storeTermVectors() == true && newType.storeTermVectors() == false) {
newType = new FieldType(newType);
newType.setStoreTermVectors(oldType.storeTermVectors());
newType.setStoreTermVectorPositions(oldType.storeTermVectorPositions());
newType.setStoreTermVectorOffsets(oldType.storeTermVectorOffsets());
newType.setStoreTermVectorPayloads(oldType.storeTermVectorPayloads());
newType.freeze();
}
return newType;
}
// TODO: if we can pull out the "make term vector options
// consistent across all instances of the same field name"
// write-once schema sort of helper class then we can
// remove the sync here. We can also fold the random
// "enable norms" (now commented out, below) into that:
public synchronized static Field newField(Random random, String name, String value, FieldType type) {
// Defeat any consumers that illegally rely on intern'd
// strings (we removed this from Lucene a while back):
name = new String(name);
if (usually(random) || !type.indexed()) {
FieldType prevType = fieldToType.get(name);
if (usually(random) || !type.indexed() || prevType != null) {
// most of the time, don't modify the params
fieldNamesSeen.add(name);
if (prevType == null) {
fieldToType.put(name, new FieldType(type));
} else {
type = mergeTermVectorOptions(type, prevType);
}
return new Field(name, value, type);
}
@ -1195,37 +1224,27 @@ public abstract class LuceneTestCase extends Assert {
// Randomly turn on term vector options, but always do
// so consistently for the same field name:
if (!newType.storeTermVectors() && fieldNamesSeen.contains(name) == false && random.nextBoolean()) {
FieldType prev;
synchronized(fieldTermVectorOptions) {
prev = fieldTermVectorOptions.get(name);
if (prev == null) {
newType.setStoreTermVectors(true);
if (!newType.storeTermVectorPositions()) {
newType.setStoreTermVectorPositions(random.nextBoolean());
if (!newType.storeTermVectors() && random.nextBoolean()) {
newType.setStoreTermVectors(true);
if (!newType.storeTermVectorPositions()) {
newType.setStoreTermVectorPositions(random.nextBoolean());
if (newType.storeTermVectorPositions()) {
if (!newType.storeTermVectorPayloads()) {
newType.setStoreTermVectorPayloads(random.nextBoolean());
}
if (!newType.storeTermVectorOffsets()) {
newType.setStoreTermVectorOffsets(random.nextBoolean());
}
}
if (newType.storeTermVectorPositions()) {
if (!newType.storeTermVectorPayloads()) {
newType.setStoreTermVectorPayloads(random.nextBoolean());
}
if (!newType.storeTermVectorOffsets()) {
newType.setStoreTermVectorOffsets(random.nextBoolean());
}
fieldTermVectorOptions.put(name, prev);
}
}
if (prev != null) {
newType.setStoreTermVectors(prev.storeTermVectors());
newType.setStoreTermVectorOffsets(prev.storeTermVectorOffsets());
newType.setStoreTermVectorPositions(prev.storeTermVectorPositions());
newType.setStoreTermVectorPayloads(prev.storeTermVectorPayloads());
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase: upgrade name=" + name + " type=" + newType);
}
System.out.println(" LTC: upgrade name=" + name + " type=" + newType);
}
newType.freeze();
fieldToType.put(name, newType);
// TODO: we need to do this, but smarter, ie, most of
// the time we set the same value for a given field but