diff --git a/pep-3106.txt b/pep-3106.txt index d9f2f53d8..cbc43859f 100644 --- a/pep-3106.txt +++ b/pep-3106.txt @@ -11,5 +11,241 @@ Post-History: Abstract +======== - Stub to reserve the PEP number. +This PEP proposes to change the .keys(), .values() and .items() +methods of the built-in dict type to return a set-like or +multiset-like object whose contents are derived of the underlying +dictionary rather than a list which is a copy of the keys, etc.; and +to remove the .iterkeys(), .itervalues() and .iteritems() methods. + +The approach is inspired by that taken in the Java Collections +Framework [1]_. + +Introduction +============ + +It has long been the plan to change the .keys(), .values() and +.items() methods of the built-in dict type to return a more +lightweight object than a list, and to get rid of .iterkeys(), +.itervalues() and .iteritems(). The idea is that code that currently +(in 2.x) reads:: + + for x in d.iterkeys(): ... + +should be rewritten as:: + + for x in d.keys(): ... + +and code that currently reads:: + + a = d.keys() # assume we really want a list here + +should be rewritten as + + a = list(d.keys()) + +There are (at least) two ways to accomplish this. The original plan +was to simply let .keys(), .values() and .items() return an iterator, +i.e. exactly what iterkeys(), itervalues() and iteritems() return +in Python 2.x. However, the Java Collections Framework [1]_ suggests +that a better solution is possible: the methods return objects with +set behavior (for .keys() and .items()) or multiset behavior (for +.values()) that do not contain copies of the keys, values or items, +but rather reference the underlying dict and pull their values out of +the dict as needed. + +The advantage of this approach is that one can still write code like +this:: + + a = d.keys() + for x in a: ... + for x in a: ... + +Effectively, iter(d.keys()) in Python 3.0 does what d.iterkeys() does +in Python 2.x; but in most contexts we don't have to write the iter() +call because it is implied by a for-loop. + +The objects returned by the .keys() and .items() methods behave like +sets with limited mutability; the allow removing elements, but not +adding them. Removing an item from these sets removes it from the +underlying dict. The object returned by the values() method behaves +like a multiset (Java calls this a Collection). It does not allow +removing elements, because a value might occur multiple times and the +implementation wouldn't know which key to remove from the underlying +dict. (The Java Collections Framework has a way around this by +removing from an iterator, but I see no practical use case for that +functionality.) + +Because of the set behavior, it will be possible to check whether two +dicts have the same keys by simply testing:: + + if a.keys() == b.keys(): ... + +and similarly for values. (Two multisets are deemed equal if they +have the same elements with the same cardinalities, +e.g. the multiset {1, 2, 2} is equal to the multiset {2, 1, 2} but +differs from the multiset {1, 2}.) + +These operations are thread-safe only to the extent that using them in +a thread-unsafe way may cause an exception but will not cause +corruption of the internal representation. + +As in Python 2.x, mutating a dict while iterating over it using an +iterator has an undefined effect and will in most cases raise a +RuntimeError exception. (This is similar to the guarantees made by +the Java Collections Framework.) + +The objects returned by .keys() and .items() are fully interoperable +with instances of the built-in set and frozenset types; for example:: + + set(d.keys()) == d.keys() + +is guaranteed to be True (except when d is being modified +simultaneously by another thread). + + +Specification +============= + +I'll try pseudo-code to specify the semantics:: + + class dict: + + # Omitting all other dict methods for brevity + + def keys(self): + return d_keys(self) + + def items(self): + return d_items(self) + + def values(self): + return d_values(self) + + class d_keys: + + def __init__(self, d): + self.__d = d + + def __len__(self): + return len(self.__d) + + def __contains__(self, key): + return key in self.__d + + def __iter__(self): + for key in self.__d: + yield key + + def remove(self, key): + del self.__d[key] + + def discard(self, key): + if key in self: + self.remove(key) + + def pop(self): + return self.__d.popitem()[0] + + def clear(self): + self.__d.clear() + + def copy(self): + return set(self) + + # The following operations should be implemented to be + # compatible with sets; this can be done by exploiting + # the above primitive operations: + # + # <, <=, ==, !=, >=, > (returning a bool) + # &, |, ^, - (returning a new, real set object) + # &=, -= (updating in place and returning self; but not |=, ^=) + # + # as well as their method counterparts (.union(), etc.). + + class d_items: + + def __init__(self, d): + self.__d = d + + def __len__(self): + return len(self.__d) + + def __contains__(self, (key, value)): + return key in self.__d and self.__d[key] == value + + def __iter__(self): + for key in self.__d: + yield key, self.__d[key] + + def remove(self, (key, value)): + del self.__d[key] + + def discard(self, item): + if item in self: + self.remove(item) + + def pop(self): + return self.__d.popitem() + + def clear(self): + self.__d.clear() + + def copy(self): + return set(self) + + # As well as the same set operations as mentioned for d_keys above. + + class d_values: + + def __init__(self, d): + self.__d = d + + def __len__(self): + return len(self.__d) + + def __contains__(self, value): + # Slow! Do we even want to implement this? + for v in self: + if v == value: + return True + return False + + def __iter__(self): + for key in self.__d: + yield self.__d[key] + + # Do we care about the following? + + def pop(self): + return self.__d.popitem()[1] + + def clear(self): + return self.__d.clear() + + def copy(self): + # XXX What should this return? + + # Should we bother implementing set-like operations on + # multisets? If so, how about mixed operations on sets and + # multisets? I'm not sure that these are worth the effort. + +I'm soliciting better names than d_keys, d_values and d_items; these +classes will be public so that their implementations may be reused by +the .keys(), .values() and .items() methods of other mappings. (Or +should they?) + + +Open Issues +=========== + +Should the d_keys, d_values and d_items classes be reusable? Should +they be subclassable? + + +References +========== + +.. [1] Java Collections Framework + http://java.sun.com/docs/books/tutorial/collections/index.html