From c69f92775bcd9e084ad06862092a6ff5401f936c Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Mon, 4 Apr 2011 16:37:07 -0700 Subject: [PATCH] Draft of PEP 399: Pure Python/C Accelerator Module Compatibiilty Requirements --- pep-0399.txt | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 pep-0399.txt diff --git a/pep-0399.txt b/pep-0399.txt new file mode 100644 index 000000000..425e7112f --- /dev/null +++ b/pep-0399.txt @@ -0,0 +1,205 @@ +PEP: 399 +Title: Pure Python/C Accelerator Module Compatibiilty Requirements +Version: $Revision: 88219 $ +Last-Modified: $Date: 2011-01-27 13:47:00 -0800 (Thu, 27 Jan 2011) $ +Author: Brett Cannon +Status: Draft +Type: Informational +Content-Type: text/x-rst +Created: 04-Apr-2011 +Python-Version: 3.3 +Post-History: + +Abstract +======== + +The Python standard library under CPython contains various instances +of modules implemented in both pure Python and C. This PEP requires +that in these instances that both the Python and C code *must* be +semantically identical (except in cases where implementation details +of a VM prevents it entirely). It is also required that new C-based +modules lacking a pure Python equivalent implementation get special +permissions to be added to the standard library. + + +Rationale +========= + +Python has grown beyond the CPython virtual machine (VM). IronPython_, +Jython_, and PyPy_ all currently being viable alternatives to the +CPython VM. This VM ecosystem that has sprung up around the Python +programming language has led to Python being used in many different +areas where CPython cannot be used, e.g., Jython allowing Python to be +used in Java applications. + +A problem all of the VMs other than CPython face is handling modules +from the standard library that are implemented in C. Since they do not +typically support the entire `C API of Python`_ they are unable to use +the code used to create the module. Often times this leads these other +VMs to either re-implement the modules in pure Python or in the +programming language used to implement the VM (e.g., in C# for +IronPython). This duplication of effort between CPython, PyPy, Jython, +and IronPython is extremely unfortunate as implementing a module *at +least* in pure Python would help mitigate this duplicate effort. + +The purpose of this PEP is to minimize this duplicate effort by +mandating that all new modules added to Python's standard library +*must* have a pure Python implementation _unless_ special dispensation +is given. This makes sure that a module in the stdlib is available to +all VMs and not just to CPython. + +Re-implementing parts (or all) of a module in C (in the case +of CPython) is still allowed for performance reasons, but any such +accelerated code must semantically match the pure Python equivalent to +prevent divergence. To accomplish this, the pure Python and C code must +be thoroughly tested with the *same* test suite to verify compliance. +This is to prevent users from accidentally relying +on semantics that are specific to the C code and are not reflected in +the pure Python implementation that other VMs rely upon, e.g., in +CPython 3.2.0, ``heapq.heappop()`` raises different exceptions +depending on whether the accelerated C code is used or not:: + + from test.support import import_fresh_module + + c_heapq = import_fresh_module('heapq', fresh=['_heapq']) + py_heapq = import_fresh_module('heapq', blocked=['_heapq']) + + + class Spam: + """Tester class which defines no other magic methods but + __len__().""" + def __len__(self): + return 0 + + + try: + c_heapq.heappop(Spam()) + except TypeError: + # "heap argument must be a list" + pass + + try: + py_heapq.heappop(Spam()) + except AttributeError: + # "'Foo' object has no attribute 'pop'" + pass + +This kind of divergence is a problem for users as they unwittingly +write code that is CPython-specific. This is also an issue for other +VM teams as they have to deal with bug reports from users thinking +that they incorrectly implemented the module when in fact it was +caused by an untested case. + + +Details +======= + +Starting in Python 3.3, any modules added to the standard library must +have a pure Python implementation. This rule can only be ignored if +the Python development team grants a special exemption for the module. +Typically the exemption would be granted only when a module wraps a +specific C-based library (e.g., sqlite3_). In granting an exemption it +will be recognized that the module will most likely be considered +exclusive to CPython and not part of Python's standard library that +other VMs are expected to support. Usage of ``ctypes`` to provide an +API for a C library will continue to be frowned upon as ``ctypes`` +lacks compiler guarantees that C code typically relies upon to prevent +certain errors from occurring (e.g., API changes). + +Even though a pure Python implementation is mandated by this PEP, it +does not preclude the use of a companion acceleration module. If an +acceleration module is provided it is to be named the same as the +module it is accelerating with an underscore attached as a prefix, +e.g., ``_warnings`` for ``warnings``. The common pattern to access +the accelerated code from the pure Python implementation is to import +it with an ``import *``, e.g., ``from _warnings import *``. This is +typically done at the end of the module to allow it to overwrite +specific Python objects with their accelerated equivalents. This kind +of import can also be done before the end of the module when needed, +e.g., an accelerated base class is provided but is then subclassed by +Python code. This PEP does not mandate that pre-existing modules in +the stdlib that lack a pure Python equivalent gain such a module. But +if people do volunteer to provide and maintain a pure Python +equivalent (e.g., the PyPy team volunteering their pure Python +implementation of the ``csv`` module and maintaining it) then such +code will be accepted. + +Any accelerated code must be semantically identical to the pure Python +implementation. The only time any semantics are allowed to be +different are when technical details of the VM providing the +accelerated code prevent matching semantics from being possible, e.g., +a class being a ``type`` when implemented in C. The semantics +equivalence requirement also dictates that no public API be provided +in accelerated code that does not exist in the pure Python code. +Without this requirement people could accidentally come to rely on a +detail in the acclerated code which is not made available to other VMs +that use the pure Python implementation. To help verify that the +contract of semantic equivalence is being met, a module must be tested +both with and without its accelerated code as thoroughly as possible. + +As an example, to write tests which exercise both the pure Python and +C acclerated versions of a module, a basic idiom can be followed:: + + import collections.abc + from test.support import import_fresh_module, run_unittest + import unittest + + c_heapq = import_fresh_module('heapq', fresh=['_heapq']) + py_heapq = import_fresh_module('heapq', blocked=['_heapq']) + + + class ExampleTest(unittest.TestCase): + + def test_heappop_exc_for_non_MutableSequence(self): + # Raise TypeError when heap is not a + # collections.abc.MutableSequence. + class Spam: + """Test class lacking many ABC-required methods + (e.g., pop()).""" + def __len__(self): + return 0 + + heap = Spam() + self.assertFalse(isinstance(heap, + collections.abc.MutableSequence)) + with self.assertRaises(TypeError): + self.heapq.heappop(heap) + + + class AcceleratedExampleTest(ExampleTest): + + """Test using the acclerated code.""" + + heapq = c_heapq + + + class PyExampleTest(ExampleTest): + + """Test with just the pure Python code.""" + + heapq = py_heapq + + + def test_main(): + run_unittest(AcceleratedExampleTest, PyExampleTest) + + + if __name__ == '__main__': + test_main() + +Thoroughness of the test can be verified using coverage measurements +with branching coverage on the pure Python code to verify that all +possible scenarios are tested using (or not using) accelerator code. + + +Copyright +========= + +This document has been placed in the public domain. + + +.. _IronPython: http://ironpython.net/ +.. _Jython: http://www.jython.org/ +.. _PyPy: http://pypy.org/ +.. _C API of Python: http://docs.python.org/py3k/c-api/index.html +.. _sqlite3: http://docs.python.org/py3k/library/sqlite3.html