diff --git a/pep-0335.txt b/pep-0335.txt index 049cde984..c050cf50e 100644 --- a/pep-0335.txt +++ b/pep-0335.txt @@ -2,13 +2,13 @@ PEP: 335 Title: Overloadable Boolean Operators Version: $Revision$ Last-Modified: $Date$ -Author: Greg Ewing +Author: Gregory Ewing Status: Draft Type: Standards Track Content-Type: text/x-rst Created: 29-Aug-2004 -Python-Version: 2.4 -Post-History: 05-Sep-2004 +Python-Version: 3.3 +Post-History: 05-Sep-2004, 30-Sep-2011 Abstract @@ -45,7 +45,7 @@ meanings for Python operators, and in some of these, having boolean operators excluded from those able to be customised can be inconvenient. Examples include: -1. Numeric/Numarray, in which almost all the operators are defined on +1. NumPy, in which almost all the operators are defined on arrays so as to perform the appropriate operation between corresponding elements, and return an array of the results. For consistency, one would expect a boolean operation between two @@ -54,7 +54,7 @@ inconvenient. Examples include: There is a precedent for an extension of this kind: comparison operators were originally restricted to returning boolean results, - and rich comparisons were added so that comparisons of Numeric + and rich comparisons were added so that comparisons of NumPy arrays could return arrays of booleans. 2. A symbolic algebra system, in which a Python expression is @@ -87,7 +87,7 @@ boolean operators to be customised are: 2. There must not be any appreciable loss of speed in the default case. -3. If possible, the customisation mechanism should allow the object to +3. Ideally, the customisation mechanism should allow the object to provide either short-circuiting or non-short-circuiting semantics, at its discretion. @@ -131,14 +131,14 @@ used. To permit short-circuiting, processing of the 'and' and 'or' operators is split into two phases. Phase 1 occurs after evaluation of the first operand but before the second. If the first operand defines the -appropriate phase 1 method, it is called with the first operand as +relevant phase 1 method, it is called with the first operand as argument. If that method can determine the result without needing the second operand, it returns the result, and further processing is skipped. If the phase 1 method determines that the second operand is needed, it returns the special value NeedOtherOperand. This triggers the -evaluation of the second operand, and the calling of an appropriate +evaluation of the second operand, and the calling of a relevant phase 2 method. During phase 2, the __and2__/__rand2__ and __or2__/__ror2__ method pairs work as for other binary operators. @@ -149,7 +149,7 @@ As a special case, if the first operand defines a phase 2 method but no corresponding phase 1 method, the second operand is always evaluated and the phase 2 method called. This allows an object which does not want short-circuiting semantics to simply implement the -relevant phase 2 methods and ignore phase 1. +phase 2 methods and ignore phase 1. Bytecodes @@ -182,9 +182,9 @@ operands and pushing the result. Type Slots ---------- -A the C level, the new special methods are manifested as five new +At the C level, the new special methods are manifested as five new slots in the type object. In the patch, they are added to the -tp_as_number substructure, since this allowed making use of some +tp_as_number substructure, since this allows making use of some existing code for dealing with unary and binary operators. Their existence is signalled by a new type flag, Py_TPFLAGS_HAVE_BOOLEAN_OVERLOAD. @@ -208,7 +208,312 @@ new operations:: PyObject *PyObject_LogicalAnd1(PyObject *); PyObject *PyObject_LogicalOr1(PyObject *); PyObject *PyObject_LogicalAnd2(PyObject *, PyObject *); - PyObject *PyObject_LogicalOr2(PyObject *, PyObject *); + + + +Alternatives and Optimisations +============================== + +This section discusses some possible variations on the proposal, +and ways in which the bytecode sequences generated for boolean +expressions could be optimised. + +Reduced special method set +-------------------------- + +For completeness, the full version of this proposal includes a +mechanism for types to define their own customised short-circuiting +behaviour. However, the full mechanism is not needed to address the +main use cases put forward here, and it would be possible to +define a simplified version that only includes the phase 2 +methods. There would then only be 5 new special methods (__and2__, +__rand2__, __or2__, __ror2__, __not__) with 3 associated type slots +and 3 API functions. + +This simplified version could be expanded to the full version +later if desired. + +Additional bytecodes +-------------------- + +As defined here, the bytecode sequence for code that branches on +the result of a boolean expression would be slightly longer than +it currently is. For example, in Python 2.7, + +:: + + if a and b: + statement1 + else: + statement2 + +generates + +:: + + LOAD_GLOBAL a + POP_JUMP_IF_FALSE false_branch + LOAD_GLOBAL b + POP_JUMP_IF_FALSE false_branch + + JUMP_FORWARD end_branch + false_branch: + + end_branch: + +Under this proposal as described so far, it would become something like + +:: + + LOAD_GLOBAL a + LOGICAL_AND_1 test + LOAD_GLOBAL b + LOGICAL_AND_2 + test: + POP_JUMP_IF_FALSE false_branch + + JUMP_FORWARD end_branch + false_branch: + + end_branch: + +This involves executing one extra bytecode in the short-circuiting +case and two extra bytecodes in the non-short-circuiting case. + +However, by introducing extra bytecodes that combine the logical +operations with testing and branching on the result, it can be +reduced to the same number of bytecodes as the original: + +:: + + LOAD_GLOBAL a + AND1_JUMP true_branch, false_branch + LOAD_GLOBAL b + AND2_JUMP_IF_FALSE false_branch + true_branch: + + JUMP_FORWARD end_branch + false_branch: + + end_branch: + +Here, AND1_JUMP performs phase 1 processing as above, +and then examines the result. If there is a result, it is popped +from the stack, its truth value is tested and a branch taken to +one of two locations. + +Otherwise, the first operand is left on the stack and execution +continues to the next bytecode. The AND2_JUMP_IF_FALSE bytecode +performs phase 2 processing, pops the result and branches if +it tests false + +For the 'or' operator, there would be corresponding OR1_JUMP +and OR2_JUMP_IF_TRUE bytecodes. + +If the simplified version without phase 1 methods is used, then +early exiting can only occur if the first operand is false for +'and' and true for 'or'. Consequently, the two-target AND1_JUMP and +OR1_JUMP bytecodes can be replaced with AND1_JUMP_IF_FALSE and +OR1_JUMP_IF_TRUE, these being ordinary branch instructions with +only one target. + +Optimisation of 'not' +--------------------- + +Recent versions of Python implement a simple optimisation in +which branching on a negated boolean expression is implemented +by reversing the sense of the branch, saving a UNARY_NOT opcode. + +Taking a strict view, this optimisation should no longer be +performed, because the 'not' operator may be overridden to produce +quite different results from usual. However, in typical use cases, +it is not envisaged that expressions involving customised boolean +operations will be used for branching -- it is much more likely +that the result will be used in some other way. + +Therefore, it would probably do little harm to specify that the +compiler is allowed to use the laws of boolean algebra to +simplify any expression that appears directly in a boolean +context. If this is inconvenient, the result can always be assigned +to a temporary name first. + +This would allow the existing 'not' optimisation to remain, and +would permit future extensions of it such as using De Morgan's laws +to extend it deeper into the expression. + + +Usage Examples +============== + +Example 1: NumPy Arrays +----------------------- + +:: + + #----------------------------------------------------------------- + # + # This example creates a subclass of numpy array to which + # 'and', 'or' and 'not' can be applied, producing an array + # of booleans. + # + #----------------------------------------------------------------- + + from numpy import array, ndarray + + class BArray(ndarray): + + def __str__(self): + return "barray(%s)" % ndarray.__str__(self) + + def __and2__(self, other): + return (self & other) + + def __or2__(self, other): + return (self & other) + + def __not__(self): + return (self == 0) + + def barray(*args, **kwds): + return array(*args, **kwds).view(type = BArray) + + a0 = barray([0, 1, 2, 4]) + a1 = barray([1, 2, 3, 4]) + a2 = barray([5, 6, 3, 4]) + a3 = barray([5, 1, 2, 4]) + + print "a0:", a0 + print "a1:", a1 + print "a2:", a2 + print "a3:", a3 + print "not a0:", not a0 + print "a0 == a1 and a2 == a3:", a0 == a1 and a2 == a3 + print "a0 == a1 or a2 == a3:", a0 == a1 or a2 == a3 + +Example 1 Output +---------------- + +:: + + a0: barray([0 1 2 4]) + a1: barray([1 2 3 4]) + a2: barray([5 6 3 4]) + a3: barray([5 1 2 4]) + not a0: barray([ True False False False]) + a0 == a1 and a2 == a3: barray([False False False True]) + a0 == a1 or a2 == a3: barray([False False False True]) + + +Example 2: Database Queries +--------------------------- + +:: + + #----------------------------------------------------------------- + # + # This example demonstrates the creation of a DSL for database + # queries allowing 'and' and 'or' operators to be used to + # formulate the query. + # + #----------------------------------------------------------------- + + class SQLNode(object): + + def __and2__(self, other): + return SQLBinop("and", self, other) + + def __rand2__(self, other): + return SQLBinop("and", other, self) + + def __eq__(self, other): + return SQLBinop("=", self, other) + + + class Table(SQLNode): + + def __init__(self, name): + self.__tablename__ = name + + def __getattr__(self, name): + return SQLAttr(self, name) + + def __sql__(self): + return self.__tablename__ + + + class SQLBinop(SQLNode): + + def __init__(self, op, opnd1, opnd2): + self.op = op.upper() + self.opnd1 = opnd1 + self.opnd2 = opnd2 + + def __sql__(self): + return "(%s %s %s)" % (sql(self.opnd1), self.op, sql(self.opnd2)) + + + class SQLAttr(SQLNode): + + def __init__(self, table, name): + self.table = table + self.name = name + + def __sql__(self): + return "%s.%s" % (sql(self.table), self.name) + + + class SQLSelect(SQLNode): + + def __init__(self, targets): + self.targets = targets + self.where_clause = None + + def where(self, expr): + self.where_clause = expr + return self + + def __sql__(self): + result = "SELECT %s" % ", ".join([sql(target) for target in self.targets]) + if self.where_clause: + result = "%s WHERE %s" % (result, sql(self.where_clause)) + return result + + + def sql(expr): + if isinstance(expr, SQLNode): + return expr.__sql__() + elif isinstance(expr, str): + return "'%s'" % expr.replace("'", "''") + else: + return str(expr) + + + def select(*targets): + return SQLSelect(targets) + + +#-------------------------------------------------------------------------------- + + dishes = Table("dishes") + customers = Table("customers") + orders = Table("orders") + + query = select(customers.name, dishes.price, orders.amount).where( + customers.cust_id == orders.cust_id and orders.dish_id == dishes.dish_id + and dishes.name == "Spam, Eggs, Sausages and Spam") + + print repr(query) + print sql(query) + +Example 2 Output +---------------- + +:: + + <__main__.SQLSelect object at 0x1cc830> + SELECT customers.name, dishes.price, orders.amount WHERE + (((customers.cust_id = orders.cust_id) AND (orders.dish_id = + dishes.dish_id)) AND (dishes.name = 'Spam, Eggs, Sausages and Spam')) Copyright