diff --git a/pep-0201.txt b/pep-0201.txt index 8f578cbc5..d429311a4 100644 --- a/pep-0201.txt +++ b/pep-0201.txt @@ -25,9 +25,10 @@ Standard For-Loops Motivation for this feature has its roots in a concept described as `parallel for loops'. A standard for-loop in Python iterates over every element in the sequence until the sequence is - exhausted. The for-loop can also be explicitly exited with a - `break' statement, and for-loops can have else: clauses, but these - is has no bearing on this PEP. + exhausted. A `break' statement inside the loop suite causes an + explicit loop exit. For-loops also have else: clauses which get + executed when the loop exits normally (i.e. not by execution of a + break). For-loops can iterate over built-in types such as lists and tuples, but they can also iterate over instance types that conform @@ -35,13 +36,13 @@ Standard For-Loops instance should implement the __getitem__() method, expecting a monotonically increasing index starting at 0, and this method should raise an IndexError when the sequence is exhausted. This - protocol is current undocumented -- a defect in Python's + protocol is currently undocumented -- a defect in Python's documentation hopefully soon corrected. - For loops are described in the language reference manual here - http://www.python.org/doc/devel/ref/for.html + For-loops are described in the Python language reference + manual[1]. - An example for-loop + An example for-loop: >>> for i in (1, 2, 3): print i ... @@ -88,7 +89,7 @@ Parallel For-Loops - The use of the magic `None' first argument is non-obvious. - - Its has arbitrary, often unintended, and inflexible semantics + - It has arbitrary, often unintended, and inflexible semantics when the lists are not of the same length: the shorter sequences are padded with `None'. @@ -110,11 +111,11 @@ The Proposed Solution The proposed solution is to introduce a new built-in sequence generator function, available in the __builtin__ module. This - function is to be called `marry' and has the following signature: + function is to be called `zip' and has the following signature: - marry(seqa, [seqb, [...]], [pad=]) + zip(seqa, [seqb, [...]], [pad=]) - marry() takes one or more sequences and weaves their elements + zip() takes one or more sequences and weaves their elements together, just as map(None, ...) does with sequences of equal length. The optional keyword argument `pad', if supplied, is a value used to pad all shorter sequences to the length of the @@ -122,15 +123,15 @@ The Proposed Solution the shortest sequence is exhausted. It is not possible to pad short lists with different pad values, - nor will marry() ever raise an exception with lists of different - lengths. To accomplish both of these, the sequences must be - checked and processed before the call to marry(). + nor will zip() ever raise an exception with lists of different + lengths. To accomplish either behavior, the sequences must be + checked and processed before the call to zip(). Lazy Execution - For performance purposes, marry() does not construct the list of + For performance purposes, zip() does not construct the list of tuples immediately. Instead it instantiates an object that implements a __getitem__() method and conforms to the informal for-loop protocol. This method constructs the individual tuples @@ -148,25 +149,25 @@ Examples >>> c = (9, 10, 11) >>> d = (12, 13) - >>> marry(a, b) + >>> zip(a, b) [(1, 5), (2, 6), (3, 7), (4, 8)] - >>> marry(a, d) + >>> zip(a, d) [(1, 12), (2, 13)] - >>> marry(a, d, pad=0) + >>> zip(a, d, pad=0) [(1, 12), (2, 13), (3, 0), (4, 0)] - >>> marry(a, d, pid=0) + >>> zip(a, d, pid=0) Traceback (most recent call last): File "", line 1, in ? - File "/usr/tmp/python-iKAOxR", line 11, in marry + File "/usr/tmp/python-iKAOxR", line 11, in zip TypeError: unexpected keyword arguments - >>> marry(a, b, c, d) + >>> zip(a, b, c, d) [(1, 5, 9, 12), (2, 6, 10, 13)] - >>> marry(a, b, c, d, pad=None) + >>> zip(a, b, c, d, pad=None) [(1, 5, 9, 12), (2, 6, 10, 13), (3, 7, 11, None), (4, 8, None, None)] >>> map(None, a, b, c, d) [(1, 5, 9, 12), (2, 6, 10, 13), (3, 7, 11, None), (4, 8, None, None)] @@ -175,17 +176,19 @@ Examples Reference Implementation - Here is a reference implementation, in Python of the marry() + Here is a reference implementation, in Python of the zip() built-in function and helper class. These would ultimately be replaced by equivalent C code. - class _Marriage: + class _Zipper: def __init__(self, args, kws): + # Defaults self.__padgiven = 0 if kws.has_key('pad'): self.__padgiven = 1 self.__pad = kws['pad'] del kws['pad'] + # Assert no unknown arguments are left if kws: raise TypeError('unexpected keyword arguments') self.__sequences = args @@ -206,6 +209,23 @@ Reference Implementation ret.append(self.__pad) return tuple(ret) + def __len__(self): + # If we're padding, then len is the length of the longest sequence, + # otherwise it's the length of the shortest sequence. + if not self.__padgiven: + shortest = -1 + for s in self.__sequences: + slen = len(s) + if shortest < 0 or slen < shortest: + shortest = slen + return shortest + longest = 0 + for s in self.__sequences: + slen = len(s) + if slen > longest: + longest = slen + return longest + def __str__(self): ret = [] i = 0 @@ -219,25 +239,130 @@ Reference Implementation __repr__ = __str__ - def marry(*args, **kws): - return _Marriage(args, kws) + def zip(*args, **kws): + return _Zipper(args, kws) + + + +Rejected Elaborations + + Some people have suggested that the user be able to specify the + type of the inner and outer containers for the zipped sequence. + This would be specified by additional keyword arguments to zip(), + named `inner' and `outer'. + + This elaboration is rejected for several reasons. First, there + really is no outer container, even though there appears to be an + outer list container the example above. This is simply an + artifact of the repr() of the zipped object. User code can do its + own looping over the zipped object via __getitem__(), and build + any type of outer container for the fully evaluated, concrete + sequence. For example, to build a zipped object with lists as an + outer container, use + + >>> list(zip(sequence_a, sequence_b, sequence_c)) + + for tuple outer container, use + + >>> tuple(zip(sequence_a, sequence_b, sequence_c)) + + This type of construction will usually not be necessary though, + since it is expected that zipped objects will most often appear in + for-loops. + + Second, allowing the user to specify the inner container + introduces needless complexity and arbitrary decisions. You might + imagine that instead of the default tuple inner container, the + user could prefer a list, or a dictionary, or instances of some + sequence-like class. + + One problem is the API. Should the argument to `inner' be a type + or a template object? For flexibility, the argument should + probably be a type object (i.e. TupleType, ListType, DictType), or + a class. For classes, the implementation could just pass the zip + element to the constructor. But what about built-in types that + don't have constructors? They would have to be special-cased in + the implementation (i.e. what is the constructor for TupleType? + The tuple() built-in). + + Another problem that arises is for zips greater than length two. + Say you had three sequences and you wanted the inner type to be a + dictionary. What would the semantics of the following be? + + >>> zip(sequence_a, sequence_b, sequence_c, inner=DictType) + + Would the key be (element_a, element_b) and the value be + element_c, or would the key be element_a and the value be + (element_b, element_c)? Or should an exception be thrown? + + This suggests that the specification of the inner container type + is needless complexity. It isn't likely that the inner container + will need to be specified very often, and it is easy to roll your + own should you need it. Tuples are chosen for the inner container + type due to their (slight) memory footprint and performance + advantages. Open Issues - What should "marry(a)" do? + - What should "zip(a)" do? Given - Given a = (1, 2, 3), should marry(a) return [(1,), (2,), (3,)] or - should it return [1, 2, 3]? The first is more consistent with the - description given above, while the latter is what map(None, a) - does, and may be more consistent with user expectation. + a = (1, 2, 3); zip(a) - The latter interpretation requires special casing, which is not - present in the reference implementation. It returns + three outcomes are possible. - >>> marry(a) - [(1,), (2,), (3,), (4,)] + 1) Returns [(1,), (2,), (3,)] + + Pros: no special casing in the implementation or in user + code, and is more consistent with the description of it's + semantics. Cons: this isn't what map(None, a) would return, + and may be counter to user expectations. + + 2) Returns [1, 2, 3] + + Pros: consistency with map(None, a), and simpler code for + for-loops, e.g. + + for i in zip(a): + + instead of + + for (i,) in zip(a): + + Cons: too much complexity and special casing for what should + be a relatively rare usage pattern. + + 3) Raises TypeError + + Pros: None + + Cons: needless restriction + + Current scoring seems to generally favor outcome 1. + + - The name of the built-in `zip' may cause some initial confusion + with the zip compression algorithm. Other suggestions include + (but are not limited to!): marry, weave, parallel, lace, braid, + interlace, permute, furl, tuples, lists, stitch, collate, knit, + plait, and with. All have disadvantages, and there is no clear + unanimous choice, therefore the decision was made to go with + `zip' because the same functionality is available in other + languages (e.g. Haskell) under the name `zip'[2]. + + + +References + + [1] http://www.python.org/doc/devel/ref/for.html + [2] http://www.haskell.org/onlinereport/standard-prelude.html#$vzip + + TBD: URL to python-dev archives + + +Copyright + + This document has been placed in the public domain.