diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000..3c4934ac8 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,11 @@ + diff --git a/.gitignore b/.gitignore index 5c1765b27..dd23b584a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ pep-0000.txt +pep-0000.rst pep-????.html __pycache__ *.pyc diff --git a/.travis.yml b/.travis.yml index 0f63831d4..b19f1f700 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - 3.5 + - "3.7-dev" sudo: false cache: pip diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst new file mode 100644 index 000000000..4bc6630fc --- /dev/null +++ b/CODE_OF_CONDUCT.rst @@ -0,0 +1,13 @@ +Code of Conduct +=============== + +Please note that all interactions on +`Python Software Foundation `__-supported +infrastructure is `covered +`__ +by the `PSF Code of Conduct `__, +which includes all infrastructure used in the development of Python itself +(e.g. mailing lists, issue trackers, GitHub, etc.). + +In general this means everyone is expected to be open, considerate, and +respectful of others no matter what their position is within the project. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 000000000..8f01b5802 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,53 @@ +Contributing Guidelines +======================= + +To learn more about the purpose of PEPs and how to go about writing a PEP, please +start reading at PEP 1 (`pep-0001.txt <./pep-0001.txt>`_ in this repo). Note that +PEP 0, the index PEP, is now automatically generated, and not committed to the repo. + +Before writing a new PEP +------------------------ + +Has this idea been proposed on `python-ideas `_ +and received general acceptance as being an idea worth pursuing? (if not then +please start a discussion there before submitting a pull request). + +More details about it in `PEP 1 `_. + +Do you have an implementation of your idea? (this is important for when you +propose this PEP to `python-dev `_ +as code maintenance is a critical aspect of all PEP proposals prior to a +final decision; in special circumstances an implementation can be deferred) + + +Commit messages +--------------- + +When committing to a PEP, please always include the PEP number in the subject +title. For example, ``PEP NNN: ``. + + +Sign the CLA +------------ + +Before you hit "Create pull request", please take a moment to ensure that this +project can legally accept your contribution by verifying you have signed the +PSF Contributor Agreement: + + https://www.python.org/psf/contrib/contrib-form/ + +If you haven't signed the CLA before, please follow the steps outlined in the +CPython devguide to do so: + + https://devguide.python.org/pullrequest/#licensing + +Thanks again to your contribution and we look forward to looking at it! + + +Code of Conduct +--------------- + +All interactions for this project are covered by the +`PSF Code of Conduct `_. Everyone is +expected to be open, considerate, and respectful of others no matter their +position within the project. diff --git a/Makefile b/Makefile index 87067b9a9..30d7da0ed 100644 --- a/Makefile +++ b/Makefile @@ -15,13 +15,13 @@ PYTHON=python3 .rst.html: @$(PYTHON) $(PEP2HTML) $< -TARGETS= $(patsubst %.rst,%.html,$(wildcard pep-????.rst)) $(patsubst %.txt,%.html,$(wildcard pep-????.txt)) pep-0000.html +TARGETS= $(patsubst %.rst,%.html,$(wildcard pep-????.rst)) $(patsubst %.txt,%.html,$(wildcard pep-????.txt)) pep-0000.html -all: pep-0000.txt $(TARGETS) +all: pep-0000.rst $(TARGETS) $(TARGETS): pep2html.py -pep-0000.txt: $(wildcard pep-????.txt) $(wildcard pep-????.rst) $(wildcard pep0/*.py) +pep-0000.rst: $(wildcard pep-????.txt) $(wildcard pep-????.rst) $(wildcard pep0/*.py) genpepindex.py $(PYTHON) genpepindex.py . rss: @@ -31,6 +31,7 @@ install: echo "Installing is not necessary anymore. It will be done in post-commit." clean: + -rm pep-0000.rst -rm pep-0000.txt -rm *.html diff --git a/README.rst b/README.rst index 8675d20fe..0f72cc094 100644 --- a/README.rst +++ b/README.rst @@ -11,6 +11,12 @@ PEPs and how to go about writing a PEP, please start reading at PEP 1 now automatically generated, and not committed to the repo. +Contributing to PEPs +==================== + +See the `Contributing Guidelines <./CONTRIBUTING.rst>`_. + + reStructuredText for PEPs ========================= @@ -26,12 +32,41 @@ package, which is available from `PyPI `_. If you have pip, ``pip install docutils`` should install it. -Generating HTML -=============== +Generating the PEP Index +======================== + +PEP 0 is automatically generated based on the metadata headers in other +PEPs. The script handling this is ``genpepindex.py``, with supporting +libraries in the ``pep0`` directory. + + +Checking PEP formatting and rendering +===================================== Do not commit changes with bad formatting. To check the formatting of a PEP, use the Makefile. In particular, to generate HTML for PEP 999, -your source code should be in ``pep-0999.txt`` and the HTML will be +your source code should be in ``pep-0999.rst`` and the HTML will be generated to ``pep-0999.html`` by the command ``make pep-0999.html``. -The default Make target generates HTML for all PEPs. If you don't have -Make, use the ``pep2html.py`` script. +The default Make target generates HTML for all PEPs. + +If you don't have Make, use the ``pep2html.py`` script directly. + + +Generating HTML for python.org +============================== + +python.org includes its own helper modules to render PEPs as HTML, with +suitable links back to the source pages in the version control repository. + +These can be found at https://github.com/python/pythondotorg/tree/master/peps + +When making changes to the PEP management process that may impact python.org's +rendering pipeline: + +* Clone the python.org repository from https://github.com/python/pythondotorg/ +* Get set up for local python.org development as per + https://pythondotorg.readthedocs.io/install.html#manual-setup +* Adjust ``PEP_REPO_PATH`` in ``pydotorg/settings/local.py`` to refer to your + local clone of the PEP repository +* Run ``./manage.py generate_pep_pages`` as described in + https://pythondotorg.readthedocs.io/pep_generation.html diff --git a/genpepindex.py b/genpepindex.py index d21ac1d46..2ab6698a0 100755 --- a/genpepindex.py +++ b/genpepindex.py @@ -36,7 +36,7 @@ def main(argv): peps = [] if os.path.isdir(path): for file_path in os.listdir(path): - if file_path == 'pep-0000.txt': + if file_path.startswith('pep-0000.'): continue abs_file_path = os.path.join(path, file_path) if not os.path.isfile(abs_file_path): @@ -61,7 +61,7 @@ def main(argv): else: raise ValueError("argument must be a directory or file path") - with codecs.open('pep-0000.txt', 'w', encoding='UTF-8') as pep0_file: + with codecs.open('pep-0000.rst', 'w', encoding='UTF-8') as pep0_file: write_pep0(peps, pep0_file) if __name__ == "__main__": diff --git a/pep-0001-1.png b/pep-0001-1.png deleted file mode 100644 index 51eb2b258..000000000 Binary files a/pep-0001-1.png and /dev/null differ diff --git a/pep-0001-process_flow.png b/pep-0001-process_flow.png new file mode 100644 index 000000000..0fc8176d2 Binary files /dev/null and b/pep-0001-process_flow.png differ diff --git a/pep-0001.txt b/pep-0001.txt index f680371a0..0c9e00128 100644 --- a/pep-0001.txt +++ b/pep-0001.txt @@ -245,7 +245,22 @@ Once a PEP has been accepted, the reference implementation must be completed. When the reference implementation is complete and incorporated into the main source code repository, the status will be changed to "Final". -A PEP can also be assigned status "Deferred". The PEP author or an +To allow gathering of additional design and interface feedback before committing +to long term stability for a language feature or standard library API, a PEP +may also be marked as "Provisional". This is short for "Provisionally Accepted", +and indicates that the proposal has been accepted for inclusion in the reference +implementation, but additional user feedback is needed before the full design +can be considered "Final". Unlike regular accepted PEPs, provisionally accepted +PEPs may still be Rejected or Withdrawn *even after the related changes have +been included in a Python release*. + +Wherever possible, it is considered preferable to reduce the scope of a proposal +to avoid the need to rely on the "Provisional" status (e.g. by deferring some +features to later PEPs), as this status can lead to version compatibility +challenges in the wider Python ecosystem. PEP 411 provides additional details +on potential use cases for the Provisional status. + +A PEP can also be assigned the status "Deferred". The PEP author or an editor can assign the PEP this status when no progress is being made on the PEP. Once a PEP is deferred, a PEP editor can re-assign it to draft status. @@ -267,7 +282,17 @@ an API can replace version 1. The possible paths of the status of PEPs are as follows: -.. image:: pep-0001-1.png +.. image:: pep-0001-process_flow.png + :alt: PEP process flow diagram + +While not shown in the diagram, "Accepted" PEPs may technically move to +"Rejected" or "Withdrawn" even after acceptance. This will only occur if +the implementation process reveals fundamental flaws in the design that were +not noticed prior to acceptance of the PEP. Unlike Provisional PEPs, these +transitions are only permitted if the accepted proposal has *not* been included +in a Python release - released changes must instead go through the regular +deprecation process (which may require a new PEP providing the rationale for +the deprecation). Some Informational and Process PEPs may also have a status of "Active" if they are never meant to be completed. E.g. PEP 1 (this PEP). @@ -281,6 +306,11 @@ reached the Final state. Once a PEP has been completed, the Language and Standard Library References become the formal documentation of the expected behavior. +If changes based on implementation experience and user feedback are made to +Standards track PEPs while in the Accepted or Provisional State, those changes +should be noted in the PEP, such that the PEP accurately describes the state of +the implementation at the point where it is marked Final. + Informational and Process PEPs may be updated over time to reflect changes to development practices and other details. The precise process followed in these cases will depend on the nature and purpose of the PEP being updated. @@ -345,6 +375,15 @@ Each PEP should have the following parts: appropriate for either the Python language reference or the standard library reference. +9. How to Teach This -- For a PEP that adds new functionality or changes + language behavior, it is helpful to include a section on how to + teach users, new and experienced, how to apply the PEP to their + work. + + This section may include key points and recommended documentation + changes that would help users adopt a new feature or migrate their + code to use a language change. + PEP Formats and Templates ========================= @@ -354,10 +393,8 @@ ReStructuredText_ allows for rich markup that is still quite easy to read, but also results in good-looking and functional HTML. PEP 12 contains instructions and a template [4]_ for reStructuredText PEPs. -A Python script automatically converts PEPs to HTML for viewing on -the web [5]_. The conversion of reStructuredText PEPs is handled by -the Docutils_ module; the same script also renders a legacy plain-text -format of PEP internally, to support pre-reST documents. +The PEP text files are automatically converted to HTML [5]_ for easier +`online reading `__. PEP Header Preamble @@ -372,7 +409,7 @@ optional and are described below. All other headers are required. :: Author: * BDFL-Delegate: * Discussions-To: - Status: Type: * Content-Type: @@ -441,8 +478,8 @@ Standards Track PEPs will typically have a Python-Version header which indicates the version of Python that the feature will be released with. Standards Track PEPs without a Python-Version header indicate interoperability standards that will initially be supported through -external libraries and tools, and then supplemented by a later PEP to -add support to the standard library. Informational and Process PEPs do +external libraries and tools, and then potentially supplemented by a later PEP +to add support to the standard library. Informational and Process PEPs do not need a Python-Version header. PEPs may have a Requires header, indicating the PEP numbers that this @@ -458,11 +495,15 @@ obsolete. Auxiliary Files =============== -PEPs may include auxiliary files such as diagrams. Such files must be +PEPs may include auxiliary files such as diagrams. Such files should be named ``pep-XXXX-Y.ext``, where "XXXX" is the PEP number, "Y" is a serial number (starting at 1), and "ext" is replaced by the actual file extension (e.g. "png"). +Alternatively, all support files may be placed in a subdirectory called +``pep-XXXX``, where "XXXX" is the PEP number. When using a subdirectory, there +are no constraints on the names used in files. + Reporting PEP Bugs, or Submitting PEP Updates ============================================= @@ -472,15 +513,15 @@ factors, such as the maturity of the PEP, the preferences of the PEP author, and the nature of your comments. For the early draft stages of the PEP, it's probably best to send your comments and changes directly to the PEP author. For more mature, or finished PEPs you may -want to submit corrections to the Python `issue tracker`_ so that your -changes don't get lost. If the PEP author is a Python developer, assign the -bug/patch to them, otherwise assign it to a PEP editor. +want to submit corrections as a `GitHub issue`_ or `GitHub pull request`_ so that +your changes don't get lost. When in doubt about where to send your changes, please check first with the PEP author and/or a PEP editor. PEP authors with git push privileges for the PEP repository can update the -PEPs themselves by using "git push" to submit their changes. +PEPs themselves by using "git push" or the GitHub PR interface to submit their +changes. Transferring PEP Ownership @@ -600,11 +641,9 @@ References and Footnotes .. [4] PEP 12, Sample reStructuredText PEP Template, Goodger, Warsaw (http://www.python.org/dev/peps/pep-0012) -.. [5] The script referred to here is pep2pyramid.py, the successor to - pep2html.py, both of which live in the same directory in the hg - repo as the PEPs themselves. Try ``pep2html.py --help`` for - details. The URL for viewing PEPs on the web is - http://www.python.org/dev/peps/. +.. [5] More details on the PEP rendering and publication process can be found + in the PEPs repo README at + https://github.com/python/peps/blob/master/README.rst .. _issue tracker: http://bugs.python.org/ @@ -619,6 +658,8 @@ References and Footnotes .. _`GitHub pull request`: https://github.com/python/peps/pulls +.. _`GitHub issue`: https://github.com/python/peps/issues + Copyright ========= diff --git a/pep-0001/process_flow.svg b/pep-0001/process_flow.svg new file mode 100644 index 000000000..a21ae5af8 --- /dev/null +++ b/pep-0001/process_flow.svg @@ -0,0 +1,580 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Provisional + + + + Draft + + + + Withdrawn + + + + Rejected + + + + Final + + + + Accepted + + + + Deferred + + + + Active + + + + Replaced + + + + + + + + + + + + + diff --git a/pep-0008.txt b/pep-0008.txt index bd0df46fb..21cc99be1 100644 --- a/pep-0008.txt +++ b/pep-0008.txt @@ -68,7 +68,7 @@ Some other good reasons to ignore a particular guideline: Python that don't support the feature recommended by the style guide. -Code lay-out +Code Lay-out ============ Indentation @@ -179,7 +179,6 @@ starts the multiline construct, as in:: 'd', 'e', 'f', ) - Tabs or Spaces? --------------- @@ -198,7 +197,6 @@ the ``-t`` option, it issues warnings about code that illegally mixes tabs and spaces. When using ``-tt`` these warnings become errors. These options are highly recommended! - Maximum Line Length ------------------- @@ -249,8 +247,7 @@ Another such case is with ``assert`` statements. Make sure to indent the continued line appropriately. - -Should a line break before or after a binary operator? +Should a Line Break Before or After a Binary Operator? ------------------------------------------------------ For decades the recommended style was to break after binary operators. @@ -287,7 +284,6 @@ In Python code, it is permissible to break before or after a binary operator, as long as the convention is consistent locally. For new code Knuth's style is suggested. - Blank Lines ----------- @@ -309,7 +305,6 @@ you may use them to separate pages of related sections of your file. Note, some editors and web-based code viewers may not recognize control-L as a form feed and will show another glyph in its place. - Source File Encoding -------------------- @@ -333,17 +328,16 @@ technical terms are used which aren't English). In addition, string literals and comments must also be in ASCII. The only exceptions are (a) test cases testing the non-ASCII features, and (b) names of authors. Authors whose names are not based on the -Latin alphabet (latin-1, ISO/IEC 8859-1 character set) MUST provide +Latin alphabet (latin-1, ISO/IEC 8859-1 character set) MUST provide a transliteration of their names in this character set. Open source projects with a global audience are encouraged to adopt a similar policy. - Imports ------- -- Imports should usually be on separate lines, e.g.:: +- Imports should usually be on separate lines:: Yes: import os import sys @@ -359,9 +353,9 @@ Imports Imports should be grouped in the following order: - 1. standard library imports - 2. related third party imports - 3. local application/library specific imports + 1. Standard library imports. + 2. Related third party imports. + 3. Local application/library specific imports. You should put a blank line between each group of imports. @@ -370,16 +364,16 @@ Imports messages) if the import system is incorrectly configured (such as when a directory inside a package ends up on ``sys.path``):: - import mypkg.sibling - from mypkg import sibling - from mypkg.sibling import example + import mypkg.sibling + from mypkg import sibling + from mypkg.sibling import example However, explicit relative imports are an acceptable alternative to absolute imports, especially when dealing with complex package layouts where using absolute imports would be unnecessarily verbose:: - from . import sibling - from .sibling import example + from . import sibling + from .sibling import example Standard library code should avoid complex package layouts and always use absolute imports. @@ -393,7 +387,7 @@ Imports from myclass import MyClass from foo.bar.yourclass import YourClass - If this spelling causes local name clashes, then spell them :: + If this spelling causes local name clashes, then spell them explicitly:: import myclass import foo.bar.yourclass @@ -412,8 +406,7 @@ Imports When republishing names this way, the guidelines below regarding public and internal interfaces still apply. - -Module level dunder names +Module Level Dunder Names ------------------------- Module level "dunders" (i.e. names with two leading and two trailing @@ -421,9 +414,7 @@ underscores) such as ``__all__``, ``__author__``, ``__version__``, etc. should be placed after the module docstring but before any import statements *except* ``from __future__`` imports. Python mandates that future-imports must appear in the module before any other code except -docstrings. - -For example:: +docstrings:: """This is the example module. @@ -524,7 +515,6 @@ Avoid extraneous whitespace in the following situations: y = 2 long_variable = 3 - Other Recommendations --------------------- @@ -642,7 +632,8 @@ Other Recommendations if foo == 'blah': one(); two(); three() -When to use trailing commas + +When to Use Trailing Commas =========================== Trailing commas are usually optional, except they are mandatory when @@ -690,7 +681,7 @@ Comments that contradict the code are worse than no comments. Always make a priority of keeping the comments up-to-date when the code changes! -Comments should be complete sentences. The first word should be +Comments should be complete sentences. The first word should be capitalized, unless it is an identifier that begins with a lower case letter (never alter the case of identifiers!). @@ -748,7 +739,7 @@ Conventions for writing good documentation strings - PEP 257 describes good docstring conventions. Note that most importantly, the ``"""`` that ends a multiline docstring should be - on a line by itself, e.g.:: + on a line by itself:: """Return a foobang @@ -882,18 +873,18 @@ Note that there is a separate convention for builtin names: most builtin names are single words (or two words run together), with the CapWords convention used only for exception names and builtin constants. -Type variable names +Type Variable Names ~~~~~~~~~~~~~~~~~~~ Names of type variables introduced in PEP 484 should normally use CapWords preferring short names: ``T``, ``AnyStr``, ``Num``. It is recommended to add suffixes ``_co`` or ``_contra`` to the variables used to declare covariant -or contravariant behavior correspondingly. Examples:: +or contravariant behavior correspondingly:: - from typing import TypeVar + from typing import TypeVar - VT_co = TypeVar('VT_co', covariant=True) - KT_contra = TypeVar('KT_contra', contravariant=True) + VT_co = TypeVar('VT_co', covariant=True) + KT_contra = TypeVar('KT_contra', contravariant=True) Exception Names ~~~~~~~~~~~~~~~ @@ -914,7 +905,7 @@ older convention of prefixing such globals with an underscore (which you might want to do to indicate these globals are "module non-public"). -Function and variable names +Function and Variable Names ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Function names should be lowercase, with words separated by @@ -926,7 +917,7 @@ mixedCase is allowed only in contexts where that's already the prevailing style (e.g. threading.py), to retain backwards compatibility. -Function and method arguments +Function and Method Arguments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Always use ``self`` for the first argument to instance methods. @@ -966,7 +957,7 @@ Constants are usually defined on a module level and written in all capital letters with underscores separating words. Examples include ``MAX_OVERFLOW`` and ``TOTAL``. -Designing for inheritance +Designing for Inheritance ~~~~~~~~~~~~~~~~~~~~~~~~~ Always decide whether a class's methods and instance variables @@ -975,7 +966,7 @@ doubt, choose non-public; it's easier to make it public later than to make a public attribute non-public. Public attributes are those that you expect unrelated clients of your -class to use, with your commitment to avoid backward incompatible +class to use, with your commitment to avoid backwards incompatible changes. Non-public attributes are those that are not intended to be used by third parties; you make no guarantees that non-public attributes won't change or even be removed. @@ -1041,8 +1032,7 @@ With this in mind, here are the Pythonic guidelines: need to avoid accidental name clashes with potential use by advanced callers. - -Public and internal interfaces +Public and Internal Interfaces ------------------------------ Any backwards compatibility guarantees apply only to public interfaces. @@ -1180,9 +1170,7 @@ Programming Recommendations continuation characters thanks to the containing parentheses. - When catching exceptions, mention specific exceptions whenever - possible instead of using a bare ``except:`` clause. - - For example, use:: + possible instead of using a bare ``except:`` clause:: try: import platform_specific_module @@ -1250,17 +1238,16 @@ Programming Recommendations - Context managers should be invoked through separate functions or methods whenever they do something other than acquire and release resources. - For example: Yes:: - with conn.begin_transaction(): - do_stuff_in_transaction(conn) + with conn.begin_transaction(): + do_stuff_in_transaction(conn) No:: - with conn: - do_stuff_in_transaction(conn) + with conn: + do_stuff_in_transaction(conn) The latter example doesn't provide any information to indicate that the ``__enter__`` and ``__exit__`` methods are doing something other @@ -1301,14 +1288,13 @@ Programming Recommendations - Use string methods instead of the string module. String methods are always much faster and share the same API with - unicode strings. Override this rule if backward compatibility with + unicode strings. Override this rule if backwards compatibility with Pythons older than 2.0 is required. - Use ``''.startswith()`` and ``''.endswith()`` instead of string slicing to check for prefixes or suffixes. - startswith() and endswith() are cleaner and less error prone. For - example:: + startswith() and endswith() are cleaner and less error prone:: Yes: if foo.startswith('bar'): No: if foo[:3] == 'bar': @@ -1328,7 +1314,7 @@ Programming Recommendations Note that in Python 3, ``unicode`` and ``basestring`` no longer exist (there is only ``str``) and a bytes object is no longer a kind of - string (it is a sequence of integers instead) + string (it is a sequence of integers instead). - For sequences, (strings, lists, tuples), use the fact that empty sequences are false. :: @@ -1336,8 +1322,8 @@ Programming Recommendations Yes: if not seq: if seq: - No: if len(seq): - if not len(seq): + No: if len(seq): + if not len(seq): - Don't write string literals that rely on significant trailing whitespace. Such trailing whitespace is visually indistinguishable @@ -1375,7 +1361,7 @@ annotations are changing. - For code that wants to make a different use of function annotations it is recommended to put a comment of the form:: - # type: ignore + # type: ignore near the top of the file; this tells type checker to ignore all annotations. (More fine-grained ways of disabling complaints from @@ -1397,7 +1383,7 @@ annotations are changing. can be added in the form of comments. See the relevant section of PEP 484 [6]_. -Variable annotations +Variable Annotations -------------------- PEP 526 introduced variable annotations. The style recommendations for them are @@ -1413,19 +1399,19 @@ similar to those on function annotations described above: - Yes:: - code: int + code: int - class Point: - coords: Tuple[int, int] - label: str = '' + class Point: + coords: Tuple[int, int] + label: str = '' - No:: - code:int # No space after colon - code : int # Space before colon + code:int # No space after colon + code : int # Space before colon - class Test: - result: int=0 # No spaces around equality sign + class Test: + result: int=0 # No spaces around equality sign - Although the PEP 526 is accepted for Python 3.6, the variable annotation syntax is the preferred syntax for stub files on all versions of Python @@ -1460,7 +1446,6 @@ References https://www.python.org/dev/peps/pep-0484/#suggested-syntax-for-python-2-7-and-straddling-code - Copyright ========= diff --git a/pep-0101.txt b/pep-0101.txt index 72e35a13d..660a1af5b 100644 --- a/pep-0101.txt +++ b/pep-0101.txt @@ -8,6 +8,7 @@ Type: Informational Content-Type: text/x-rst Created: 22-Aug-2001 Post-History: +Replaces: 102 Abstract @@ -158,7 +159,7 @@ to perform some manual editing steps. - Check the stable buildbots. - Go to http://buildbot.python.org/all/waterfall + Go to http://buildbot.python.org/all/#/grid Look at the buildbots for the release you're making. Ignore any that are offline (or inform the community so diff --git a/pep-0263.txt b/pep-0263.txt index 26bd634f6..7aff27ffd 100644 --- a/pep-0263.txt +++ b/pep-0263.txt @@ -72,7 +72,7 @@ or:: More precisely, the first or second line must match the following regular expression:: - ^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+) + ^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+) The first group of this expression is then interpreted as encoding name. If the encoding diff --git a/pep-0292.txt b/pep-0292.txt index 6930372fc..f4001d112 100644 --- a/pep-0292.txt +++ b/pep-0292.txt @@ -9,6 +9,7 @@ Content-Type: text/x-rst Created: 18-Jun-2002 Python-Version: 2.4 Post-History: 18-Jun-2002, 23-Mar-2004, 22-Aug-2004 +Replaces: 215 Abstract diff --git a/pep-0304.txt b/pep-0304.txt index a66268f2e..676592efd 100644 --- a/pep-0304.txt +++ b/pep-0304.txt @@ -9,6 +9,21 @@ Content-Type: text/x-rst Created: 22-Jan-2003 Post-History: 27-Jan-2003, 31-Jan-2003, 17-Jun-2005 +Historical Note +=============== + +While this original PEP was withdrawn, a variant of this feature +was eventually implemented for Python 3.8 in https://bugs.python.org/issue33499 + +Several of the issues and concerns originally raised in this PEP were resolved +by other changes in the intervening years: + +- the introduction of isolated mode to handle potential security concerns +- the switch to ``importlib``, a fully import-hook based import system implementation +- PEP 3147's change in the bytecode cache layout to use ``__pycache__`` + subdirectories, including the ``source_to_cache(path)`` and + ``cache_to_source(path)`` APIs that allow the interpreter to automatically + handle the redirection to a separate cache directory Abstract ======== diff --git a/pep-0345.txt b/pep-0345.txt index f3c546784..561d49faf 100644 --- a/pep-0345.txt +++ b/pep-0345.txt @@ -4,7 +4,7 @@ Version: $Revision$ Last-Modified: $Date$ Author: Richard Jones Discussions-To: Distutils SIG -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 28-Apr-2005 diff --git a/pep-0373.txt b/pep-0373.txt index 0ec40db78..e5726d810 100644 --- a/pep-0373.txt +++ b/pep-0373.txt @@ -50,13 +50,12 @@ Maintenance releases ==================== Being the last of the 2.x series, 2.7 will have an extended period of -maintenance. The current plan is to support it for at least 10 years -from the initial 2.7 release. This means there will be bugfix releases -until 2020. +maintenance. Specifically, 2.7 will receive bugfix support until +January 1, 2020. All 2.7 development work will cease in 2020. Planned future release dates: -- 2.7.15 2018 +- 2.7.16 late 2018 - early 2019 Dates of previous maintenance releases: @@ -84,6 +83,8 @@ Dates of previous maintenance releases: - 2.7.13 2016-12-17 - 2.7.14rc1 2017-08-26 - 2.7.14 2017-09-16 +- 2.7.15rc1 2018-04-14 +- 2.7.15 2018-05-01 2.7.0 Release Schedule ====================== diff --git a/pep-0376.txt b/pep-0376.txt index 5a16a0dfe..5beb70180 100644 --- a/pep-0376.txt +++ b/pep-0376.txt @@ -3,7 +3,7 @@ Title: Database of Installed Python Distributions Version: $Revision$ Last-Modified: $Date$ Author: Tarek Ziadé -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 22-Feb-2009 @@ -152,7 +152,7 @@ This `.dist-info` directory can contain these files: - `RECORD`: records the list of installed files - `INSTALLER`: records the name of the tool used to install the project - `REQUESTED`: the presence of this file indicates that the project - installation was explicitly requested (i.e., not installed as a dependency). + installation was explicitly requested (i.e., not installed as a dependency). The METADATA, RECORD and INSTALLER files are mandatory, while REQUESTED may be missing. diff --git a/pep-0378.txt b/pep-0378.txt index f9464ba3c..0e50044e1 100644 --- a/pep-0378.txt +++ b/pep-0378.txt @@ -144,7 +144,7 @@ specifiers like:: Java offers a `Decimal.Format Class`_ that uses picture patterns (one for positive numbers and an optional one for negatives) such as: ``"#,##0.00;(#,##0.00)"``. It allows arbitrary groupings including -hundreds and ten-thousands and uneven groupings. The special patten +hundreds and ten-thousands and uneven groupings. The special pattern characters are non-localized (using a DOT for a decimal separator and a COMMA for a grouping separator). The user can supply an alternate set of symbols using the formatter's *DecimalFormatSymbols* object. diff --git a/pep-0387.txt b/pep-0387.txt index 0dbec0be0..95c588f06 100644 --- a/pep-0387.txt +++ b/pep-0387.txt @@ -52,7 +52,7 @@ be removed at any time in any way. These include: - Function, class, module, attribute, method, and C-API names and types that are prefixed by "_" (except special names). The contents of these - can also are not subject to the policy. + are also not subject to the policy. - Inheritance patterns of internal classes. diff --git a/pep-0394.txt b/pep-0394.txt index 428ef4f21..8da9fb58f 100644 --- a/pep-0394.txt +++ b/pep-0394.txt @@ -4,12 +4,13 @@ Version: $Revision$ Last-Modified: $Date$ Author: Kerrick Staley , Nick Coghlan , - Barry Warsaw + Barry Warsaw , + Petr Viktorin Status: Active Type: Informational Content-Type: text/x-rst Created: 02-Mar-2011 -Post-History: 04-Mar-2011, 20-Jul-2011, 16-Feb-2012, 30-Sep-2014 +Post-History: 04-Mar-2011, 20-Jul-2011, 16-Feb-2012, 30-Sep-2014, 28-Apr-2018 Resolution: https://mail.python.org/pipermail/python-dev/2012-February/116594.html @@ -22,8 +23,9 @@ Python interpreter (i.e. the version invoked by the ``python`` command). * ``python2`` will refer to some version of Python 2.x. * ``python3`` will refer to some version of Python 3.x. -* for the time being, all distributions *should* ensure that ``python`` - refers to the same target as ``python2``. +* for the time being, all distributions *should* ensure that ``python``, + if installed, refers to the same target as ``python2``, unless the user + deliberately overrides this or a virtual environment is active. * however, end users should be aware that ``python`` refers to ``python3`` on at least Arch Linux (that change is what prompted the creation of this PEP), so ``python`` should be used in the shebang line only for scripts @@ -43,8 +45,7 @@ Recommendation * When invoked, ``python2`` should run some version of the Python 2 interpreter, and ``python3`` should run some version of the Python 3 interpreter. -* The more general ``python`` command should be installed whenever - any version of Python 2 is installed and should invoke the same version of +* If the ``python`` command is installed, it should invoke the same version of Python as the ``python2`` command (however, note that some distributions have already chosen to have ``python`` implement the ``python3`` command; see the `Rationale`_ and `Migration Notes`_ below). @@ -62,14 +63,30 @@ Recommendation context. * One exception to this is scripts that are deliberately written to be source compatible with both Python 2.x and 3.x. Such scripts may continue to use - ``python`` on their shebang line without affecting their portability. + ``python`` on their shebang line. +* When packaging software that is source compatible with both versions, + distributions may change such ``python`` shebangs to ``python3``. + This ensures software is used with the latest version of + Python available, and it can remove a dependency on Python 2. * When reinvoking the interpreter from a Python script, querying ``sys.executable`` to avoid hardcoded assumptions regarding the interpreter location remains the preferred approach. +* In controlled environments aimed at expert users, where being explicit + is valued over user experience (for example, in test environments and + package build systems), distributions may choose to not provide the + ``python`` command even if ``python2`` is available. + (All software in such a controlled environment must use ``python3`` or + ``python2`` rather than ``python``, which means scripts that deliberately + use ``python`` need to be modified for such environments.) +* When a virtual environment (created by the PEP 405 ``venv`` package or a + similar tool) is active, the ``python`` command should refer to the + virtual environment's interpreter. In other words, activating a virtual + environment counts as deliberate user action to change the default + ``python`` interpreter. These recommendations are the outcome of the relevant python-dev discussions -in March and July 2011 ([1]_, [2]_), February 2012 ([4]_) and -September 2014 ([6]_). +in March and July 2011 ([1]_, [2]_), February 2012 ([4]_), +September 2014 ([6]_), and discussion on GitHub in April 2018 ([7]_). Rationale @@ -91,11 +108,6 @@ on the part of distribution maintainers. Future Changes to this Recommendation ===================================== -It is anticipated that there will eventually come a time where the third -party ecosystem surrounding Python 3 is sufficiently mature for this -recommendation to be updated to suggest that the ``python`` symlink -refer to ``python3`` rather than ``python2``. - This recommendation will be periodically reviewed over the next few years, and updated when the core development team judges it appropriate. As a point of reference, regular maintenance releases for the Python 2.7 series @@ -150,15 +162,13 @@ making such a change. * When the ``pythonX.X`` binaries are provided by a distribution, the ``python2`` and ``python3`` commands should refer to one of those files rather than being provided as a separate binary file. -* It is suggested that even distribution-specific packages follow the - ``python2``/``python3`` convention, even in code that is not intended to +* It is strongly encouraged that distribution-specific packages use ``python2`` + or ``python3`` rather than ``python``, even in code that is not intended to operate on other distributions. This will reduce problems if the distribution later decides to change the version of the Python interpreter that the ``python`` command invokes, or if a sysadmin installs a custom ``python`` command with a different major version than the distribution - default. Distributions can test whether they are fully following this - convention by changing the ``python`` interpreter on a test box and checking - to see if anything breaks. + default. * If the above point is adhered to and sysadmins are permitted to change the ``python`` command, then the ``python`` command should always be implemented as a link to the interpreter binary (or a link to a link) and not vice @@ -267,6 +277,10 @@ References .. [6] PEP 394 - Clarification of what "python" command should invoke (https://mail.python.org/pipermail/python-dev/2014-September/136374.html) +.. [7] PEP 394: Allow the `python` command to not be installed, and other + minor edits + (https://github.com/python/peps/pull/630) + Copyright =========== This document has been placed in the public domain. diff --git a/pep-0411.txt b/pep-0411.txt index d0d263275..01b9f751e 100644 --- a/pep-0411.txt +++ b/pep-0411.txt @@ -4,7 +4,7 @@ Version: $Revision$ Last-Modified: $Date$ Author: Nick Coghlan , Eli Bendersky -Status: Accepted +Status: Active Type: Informational Content-Type: text/x-rst Created: 2012-02-10 diff --git a/pep-0425.txt b/pep-0425.txt index c8e1bc416..7d94cc7f7 100644 --- a/pep-0425.txt +++ b/pep-0425.txt @@ -4,7 +4,7 @@ Version: $Revision$ Last-Modified: 07-Aug-2012 Author: Daniel Holth BDFL-Delegate: Nick Coghlan -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 27-Jul-2012 diff --git a/pep-0426.txt b/pep-0426.txt index 737b48f04..576f01334 100644 --- a/pep-0426.txt +++ b/pep-0426.txt @@ -7,7 +7,7 @@ Author: Nick Coghlan , Donald Stufft BDFL-Delegate: Donald Stufft Discussions-To: Distutils SIG -Status: Deferred +Status: Withdrawn Type: Informational Content-Type: text/x-rst Requires: 440, 508, 518 @@ -18,6 +18,20 @@ Post-History: 14 Nov 2012, 5 Feb 2013, 7 Feb 2013, 9 Feb 2013, Replaces: 345 +PEP Withdrawal +============== + +The ground-up metadata redesign proposed in this PEP has been withdrawn in +favour of the more modest proposal in PEP 566, which retains the basic +Key:Value format of previous metadata versions, but also defines a standardised +mechanism for translating that format to nested JSON-compatible data structures. + +Some of the ideas in this PEP (or the related PEP 459) may still be considered +as part of later proposals, but they will be handled in a more incremental +fashion, rather than as a single large proposed change with no feasible +migration plan. + + Abstract ======== @@ -25,16 +39,13 @@ This PEP describes a mechanism for publishing and exchanging metadata related to Python distributions. It includes specifics of the field names, and their semantics and usage. -This document specifies version 3.0 of the metadata format. +This document specifies the never released version 2.0 of the metadata format. Version 1.0 is specified in PEP 241. Version 1.1 is specified in PEP 314. Version 1.2 is specified in PEP 345. -Version 2.0 is specified in earlier drafts of this PEP and was never formally -approved for use. - -Version 3.0 of the metadata format migrates from directly defining a +Version 2.0 of the metadata format proposed migrating from directly defining a custom key-value file format to instead defining a JSON-compatible in-memory representation that may be used to define metadata representation in other contexts (such as API and archive format definitions). @@ -44,8 +55,8 @@ fields to be added for particular purposes without requiring updates to the core metadata format. -Note on PEP Deferral -==================== +Note on PEP History +=================== This PEP was initially deferred for an extended period, from December 2013 through to March 2017, as distutils-sig worked through a number of other @@ -74,7 +85,7 @@ of publishing and distributing software to be moved out to PEP 459, a separate proposal for a number of standard metadata extensions that provide additional optional information about a release. -As of September 2017, it has been deferred again, on the grounds that +As of September 2017, it was deferred again, on the grounds that it doesn't actually help solve any particularly pressing problems: - JSON representation would be better handled through defining a @@ -87,6 +98,9 @@ it doesn't actually help solve any particularly pressing problems: .. _specifications: https://packaging.python.org/specifications/ .. _minor spec version update: https://mail.python.org/pipermail/distutils-sig/2017-September/031465.html +Finally, the PEP was withdrawn in February 2018 in favour of PEP 566 (which +pursues that more incremental strategy). + Purpose ======= @@ -391,7 +405,7 @@ binary archive from a source archive. These locations are to be confirmed, since they depend on the definition of sdist 2.0 and the revised installation database standard. There will also be a wheel 1.1 format update after this PEP is approved that - mandates provision of 3.0+ metadata. + mandates provision of 2.0+ metadata. Note that these metadata files MAY be processed even if the version of the containing location is too low to indicate that they are valid. Specifically, @@ -414,7 +428,7 @@ used directly as a data input format. Generating the metadata as part of the publication process also helps to deal with version specific fields (including the source URL and the version field itself). -For backwards compatibility with older installation tools, metadata 3.0 +For backwards compatibility with older installation tools, metadata 2.0 files MAY be distributed alongside legacy metadata. Index servers MAY allow distributions to be uploaded and installation tools @@ -443,8 +457,8 @@ with RFC 3986. The current version of the schema file covers the previous draft of the PEP, and has not yet been updated for the split into the essential dependency resolution metadata and multiple standard extensions, and nor - has it been updated for the various other differences between the 3.0 - draft and the earlier 2.0 drafts. + has it been updated for the various other differences between the current + draft and the earlier drafts. Core metadata @@ -467,7 +481,7 @@ installation to occur. Metadata version ---------------- -Version of the file format; ``"3.0"`` is the only legal value. +Version of the file format; ``"2.0"`` is the only legal value. Automated tools consuming metadata SHOULD warn if ``metadata_version`` is greater than the highest version they support, and MUST fail if @@ -481,7 +495,7 @@ all of the needed fields. Example:: - "metadata_version": "3.0" + "metadata_version": "2.0" Generator @@ -1046,7 +1060,7 @@ Appendix A: Conversion notes for legacy metadata ================================================ The reference implementations for converting from legacy metadata to -metadata 3.0 are: +metadata 2.0 are: * the `wheel project `__, which adds the ``bdist_wheel`` command to ``setuptools`` @@ -1114,7 +1128,7 @@ format. Appendix C: Summary of differences from \PEP 345 ================================================= -* Metadata-Version is now 3.0, with semantics specified for handling +* Metadata-Version is now 2.0, with semantics specified for handling version changes * The increasingly complex ad hoc "Key: Value" format has been replaced by @@ -1175,7 +1189,7 @@ provision of multiple versions of the metadata in parallel. Existing tools won't abide by this guideline until they're updated to support the new metadata standard, so the new semantics will first take -effect for a hypothetical 2.x -> 3.0 transition. For the 1.x -> 3.0 +effect for a hypothetical 2.x -> 3.0 transition. For the 1.x -> 2.x transition, we will use the approach where tools continue to produce the existing supplementary files (such as ``entry_points.txt``) in addition to any equivalents specified using the new features of the standard @@ -1283,7 +1297,7 @@ packages. The ability to declare an extension as required is included primarily to allow the definition of the metadata hooks extension to be deferred until -some time after the initial adoption of the metadata 3.0 specification. If +some time after the initial adoption of the metadata 2.0 specification. If a release needs a ``postinstall`` hook to run in order to complete the installation successfully, then earlier versions of tools should fall back to installing from source rather than installing from a wheel file and @@ -1299,10 +1313,10 @@ order to better prioritise our efforts in migrating to the new metadata standard. These all reflect information that may be nice to have in the new metadata, but which can be readily added through metadata extensions or in metadata 2.1 without breaking any use cases already supported by metadata -3.0. +2.0. Once the ``pypi``, ``setuptools``, ``pip``, ``wheel`` and ``distlib`` -projects support creation and consumption of metadata 3.0, then we may +projects support creation and consumption of metadata 2.0, then we may revisit the creation of metadata 2.1 with some or all of these additional features. @@ -1484,7 +1498,7 @@ the idea won't be reconsidered until metadata 2.1 at the earliest). References ========== -This document specifies version 3.0 of the metadata format. +This document specifies version 2.0 of the metadata format. Version 1.0 is specified in PEP 241. Version 1.1 is specified in PEP 314. Version 1.2 is specified in PEP 345. diff --git a/pep-0427.txt b/pep-0427.txt index 47c230398..56599db03 100644 --- a/pep-0427.txt +++ b/pep-0427.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Daniel Holth BDFL-Delegate: Nick Coghlan Discussions-To: -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 20-Sep-2012 diff --git a/pep-0429.txt b/pep-0429.txt index 31acea7e3..9058ed4df 100644 --- a/pep-0429.txt +++ b/pep-0429.txt @@ -61,13 +61,16 @@ The releases so far: - 3.4.6 final: January 17, 2017 - 3.4.7 candidate 1: July 25, 2017 - 3.4.7 final: August 9, 2017 +- 3.4.8 candidate 1: January 23, 2018 +- 3.4.8 final: February 4, 2018 -.. There are no currently planned releases of Python 3.4. +.. There are no specific plans for the next release of Python 3.4. Planned future releases: -- 3.4.8 candidate 1: January 21, 2018 -- 3.4.8 final: February 4, 2018 +- 3.4.9 candidate 1: July 18, 2018 +- 3.4.9 final: August 1, 2018 + Features for 3.4 diff --git a/pep-0435.txt b/pep-0435.txt index a15fda364..1fe5341df 100644 --- a/pep-0435.txt +++ b/pep-0435.txt @@ -11,6 +11,7 @@ Content-Type: text/x-rst Created: 2013-02-23 Python-Version: 3.4 Post-History: 2013-02-23, 2013-05-02 +Replaces: 354 Resolution: https://mail.python.org/pipermail/python-dev/2013-May/126112.html diff --git a/pep-0440.txt b/pep-0440.txt index a2add4c5e..099816f3d 100644 --- a/pep-0440.txt +++ b/pep-0440.txt @@ -6,7 +6,7 @@ Author: Nick Coghlan , Donald Stufft BDFL-Delegate: Nick Coghlan Discussions-To: Distutils SIG -Status: Accepted +Status: Active Type: Informational Content-Type: text/x-rst Created: 18 Mar 2013 diff --git a/pep-0446.txt b/pep-0446.txt index a5d3de328..23ad82de2 100644 --- a/pep-0446.txt +++ b/pep-0446.txt @@ -8,6 +8,7 @@ Type: Standards Track Content-Type: text/x-rst Created: 5-August-2013 Python-Version: 3.4 +Replaces: 433 Abstract diff --git a/pep-0449.txt b/pep-0449.txt index 2bf122974..569ebc31d 100644 --- a/pep-0449.txt +++ b/pep-0449.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Donald Stufft BDFL-Delegate: Richard Jones Discussions-To: distutils-sig@python.org -Status: Accepted +Status: Final Type: Process Content-Type: text/x-rst Created: 04-Aug-2013 diff --git a/pep-0459.txt b/pep-0459.txt index 196753a46..186233c6f 100644 --- a/pep-0459.txt +++ b/pep-0459.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Nick Coghlan BDFL-Delegate: Nick Coghlan Discussions-To: Distutils SIG -Status: Deferred +Status: Withdrawn Type: Standards Track Content-Type: text/x-rst Requires: 426 @@ -13,6 +13,17 @@ Created: 11 Nov 2013 Post-History: 21 Dec 2013 +PEP Withdrawal +============== + +This PEP depends on PEP 426, which has itself been withdrawn. See the +PEP Withdrawal section in that PEP for details. + +In the meantime, metadata extensions will continue to be handled as they +have been for past examples like ``entry_points.txt``: as additional files +installed into metadata directories alongside the main `METADATA` file. + + Abstract ======== @@ -22,22 +33,6 @@ Like all metadata extensions, each standard extension format is independently versioned. Changing any of the formats requires an update to this PEP, but does not require an update to the core packaging metadata. -PEP Deferral -============ - -This PEP depends on PEP 426, which has itself been deferred. See the -PEP Deferral section in that PEP for details. - -.. note:: - - These extensions may eventually be separated out into their own PEPs, - but we're already suffering from PEP overload in the packaging - metadata space. - - This PEP was initially created by slicing out large sections of earlier - drafts of PEP 426 and making them extensions, so some of the specifics - may still be rough in the new context. - Standard Extension Namespace ============================ diff --git a/pep-0464.txt b/pep-0464.txt index 45f7af058..dd1bf5f20 100644 --- a/pep-0464.txt +++ b/pep-0464.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Donald Stufft BDFL-Delegate: Richard Jones Discussions-To: distutils-sig@python.org -Status: Accepted +Status: Final Type: Process Content-Type: text/x-rst Created: 02-Mar-2014 diff --git a/pep-0467.txt b/pep-0467.txt index 77a9c1067..08eb5ec40 100644 --- a/pep-0467.txt +++ b/pep-0467.txt @@ -2,13 +2,13 @@ PEP: 467 Title: Minor API improvements for binary sequences Version: $Revision$ Last-Modified: $Date$ -Author: Nick Coghlan +Author: Nick Coghlan , Ethan Furman Status: Draft Type: Standards Track Content-Type: text/x-rst Created: 2014-03-30 -Python-Version: 3.5 -Post-History: 2014-03-30 2014-08-15 2014-08-16 +Python-Version: 3.8 +Post-History: 2014-03-30 2014-08-15 2014-08-16 2016-06-07 2016-09-01 Abstract @@ -20,22 +20,25 @@ that is now referred to as ``bytearray``. Other aspects of operating in the binary domain in Python have also evolved over the course of the Python 3 series. -This PEP proposes four small adjustments to the APIs of the ``bytes``, -``bytearray`` and ``memoryview`` types to make it easier to operate entirely -in the binary domain: +This PEP proposes five small adjustments to the APIs of the ``bytes`` and +``bytearray`` types to make it easier to operate entirely in the binary domain: * Deprecate passing single integer values to ``bytes`` and ``bytearray`` -* Add ``bytes.zeros`` and ``bytearray.zeros`` alternative constructors -* Add ``bytes.byte`` and ``bytearray.byte`` alternative constructors -* Add ``bytes.iterbytes``, ``bytearray.iterbytes`` and - ``memoryview.iterbytes`` alternative iterators +* Add ``bytes.fromsize`` and ``bytearray.fromsize`` alternative constructors +* Add ``bytes.fromord`` and ``bytearray.fromord`` alternative constructors +* Add ``bytes.getbyte`` and ``bytearray.getbyte`` byte retrieval methods +* Add ``bytes.iterbytes`` and ``bytearray.iterbytes`` alternative iterators + +And one built-in:: + +* bchr Proposals ========= -Deprecation of current "zero-initialised sequence" behaviour ------------------------------------------------------------- +Deprecation of current "zero-initialised sequence" behaviour without removal +---------------------------------------------------------------------------- Currently, the ``bytes`` and ``bytearray`` constructors accept an integer argument and interpret it as meaning to create a zero-initialised sequence @@ -46,62 +49,75 @@ of the given size:: >>> bytearray(3) bytearray(b'\x00\x00\x00') -This PEP proposes to deprecate that behaviour in Python 3.5, and remove it -entirely in Python 3.6. +This PEP proposes to deprecate that behaviour in Python 3.6, but to leave +it in place for at least as long as Python 2.7 is supported, possibly +indefinitely. No other changes are proposed to the existing constructors. -Addition of explicit "zero-initialised sequence" constructors -------------------------------------------------------------- +Addition of explicit "count and byte initialised sequence" constructors +----------------------------------------------------------------------- To replace the deprecated behaviour, this PEP proposes the addition of an -explicit ``zeros`` alternative constructor as a class method on both -``bytes`` and ``bytearray``:: +explicit ``fromsize`` alternative constructor as a class method on both +``bytes`` and ``bytearray`` whose first argument is the count, and whose +second argument is the fill byte to use (defaults to ``\x00``):: - >>> bytes.zeros(3) + >>> bytes.fromsize(3) b'\x00\x00\x00' - >>> bytearray.zeros(3) + >>> bytearray.fromsize(3) bytearray(b'\x00\x00\x00') + >>> bytes.fromsize(5, b'\x0a') + b'\x0a\x0a\x0a\x0a\x0a' + >>> bytearray.fromsize(5, b'\x0a') + bytearray(b'\x0a\x0a\x0a\x0a\x0a') -It will behave just as the current constructors behave when passed a single -integer. - -The specific choice of ``zeros`` as the alternative constructor name is taken -from the corresponding initialisation function in NumPy (although, as these -are 1-dimensional sequence types rather than N-dimensional matrices, the -constructors take a length as input rather than a shape tuple) +``fromsize`` will behave just as the current constructors behave when passed a single +integer, while allowing for non-zero fill values when needed. -Addition of explicit "single byte" constructors ------------------------------------------------ +Addition of "bchr" function and explicit "single byte" constructors +------------------------------------------------------------------- -As binary counterparts to the text ``chr`` function, this PEP proposes the -addition of an explicit ``byte`` alternative constructor as a class method -on both ``bytes`` and ``bytearray``:: +As binary counterparts to the text ``chr`` function, this PEP proposes +the addition of a ``bchr`` function and an explicit ``fromord`` alternative +constructor as a class method on both ``bytes`` and ``bytearray``:: - >>> bytes.byte(3) - b'\x03' - >>> bytearray.byte(3) - bytearray(b'\x03') + >>> bchr(ord("A")) + b'A' + >>> bchr(ord(b"A")) + b'A' + >>> bytes.fromord(65) + b'A' + >>> bytearray.fromord(65) + bytearray(b'A') These methods will only accept integers in the range 0 to 255 (inclusive):: - >>> bytes.byte(512) + >>> bytes.fromord(512) Traceback (most recent call last): File "", line 1, in - ValueError: bytes must be in range(0, 256) + ValueError: integer must be in range(0, 256) - >>> bytes.byte(1.0) + >>> bytes.fromord(1.0) Traceback (most recent call last): File "", line 1, in TypeError: 'float' object cannot be interpreted as an integer -The documentation of the ``ord`` builtin will be updated to explicitly note -that ``bytes.byte`` is the inverse operation for binary data, while ``chr`` -is the inverse operation for text data. +While this does create some duplication, there are valid reasons for it: -Behaviourally, ``bytes.byte(x)`` will be equivalent to the current +* the ``bchr`` builtin is to recreate the ord/chr/unichr trio from Python + 2 under a different naming scheme +* the class method is mainly for the ``bytearray.fromord`` case, with + ``bytes.fromord`` added for consistency + +The documentation of the ``ord`` builtin will be updated to explicitly note +that ``bchr`` is the primary inverse operation for binary data, while ``chr`` +is the inverse operation for text data, and that ``bytes.fromord`` and +``bytearray.fromord`` also exist. + +Behaviourally, ``bytes.fromord(x)`` will be equivalent to the current ``bytes([x])`` (and similarly for ``bytearray``). The new spelling is expected to be easier to discover and easier to read (especially when used in conjunction with indexing operations on binary sequence types). @@ -110,35 +126,37 @@ As a separate method, the new spelling will also work better with higher order functions like ``map``. +Addition of "getbyte" method to retrieve a single byte +------------------------------------------------------ + +This PEP proposes that ``bytes`` and ``bytearray`` gain the method ``getbyte`` +which will always return ``bytes``:: + + >>> b'abc'.getbyte(0) + b'a' + +If an index is asked for that doesn't exist, ``IndexError`` is raised:: + + >>> b'abc'.getbyte(9) + Traceback (most recent call last): + File "", line 1, in + IndexError: index out of range + + Addition of optimised iterator methods that produce ``bytes`` objects --------------------------------------------------------------------- -This PEP proposes that ``bytes``, ``bytearray`` and ``memoryview`` gain an -optimised ``iterbytes`` method that produces length 1 ``bytes`` objects -rather than integers:: +This PEP proposes that ``bytes`` and ``bytearray``gain an optimised +``iterbytes`` method that produces length 1 ``bytes`` objects rather than +integers:: for x in data.iterbytes(): # x is a length 1 ``bytes`` object, rather than an integer -The method can be used with arbitrary buffer exporting objects by wrapping -them in a ``memoryview`` instance first:: +For example:: - for x in memoryview(data).iterbytes(): - # x is a length 1 ``bytes`` object, rather than an integer - -For ``memoryview``, the semantics of ``iterbytes()`` are defined such that:: - - memview.tobytes() == b''.join(memview.iterbytes()) - -This allows the raw bytes of the memory view to be iterated over without -needing to make a copy, regardless of the defined shape and format. - -The main advantage this method offers over the ``map(bytes.byte, data)`` -approach is that it is guaranteed *not* to fail midstream with a -``ValueError`` or ``TypeError``. By contrast, when using the ``map`` based -approach, the type and value of the individual items in the iterable are -only checked as they are retrieved and passed through the ``bytes.byte`` -constructor. + >>> tuple(b"ABC".iterbytes()) + (b'A', b'B', b'C') Design discussion @@ -163,10 +181,18 @@ This PEP isn't revisiting that original design decision, just changing the spelling as users sometimes find the current behaviour of the binary sequence constructors surprising. In particular, there's a reasonable case to be made that ``bytes(x)`` (where ``x`` is an integer) should behave like the -``bytes.byte(x)`` proposal in this PEP. Providing both behaviours as separate +``bytes.fromord(x)`` proposal in this PEP. Providing both behaviours as separate class methods avoids that ambiguity. +Open Questions +============== + +Do we add ``iterbytes`` to ``memoryview``, or modify +``memoryview.cast()`` to accept ``'s'`` as a single-byte interpretation? Or +do we ignore memory for now and add it later? + + References ========== @@ -180,19 +206,11 @@ References (http://bugs.python.org/issue21644) .. [5] August 2014 discussion thread on python-dev (https://mail.python.org/pipermail/python-ideas/2014-March/027295.html) +.. [6] June 2016 discussion thread on python-dev + (https://mail.python.org/pipermail/python-dev/2016-June/144875.html) Copyright ========= This document has been placed in the public domain. - - -.. - Local Variables: - mode: indented-text - indent-tabs-mode: nil - sentence-end-double-space: t - fill-column: 70 - coding: utf-8 - End: diff --git a/pep-0470.txt b/pep-0470.txt index 2ab5d62ba..87b904465 100644 --- a/pep-0470.txt +++ b/pep-0470.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Donald Stufft BDFL-Delegate: Paul Moore Discussions-To: distutils-sig@python.org -Status: Accepted +Status: Final Type: Process Content-Type: text/x-rst Created: 12-May-2014 diff --git a/pep-0478.txt b/pep-0478.txt index b94770927..d69292dab 100644 --- a/pep-0478.txt +++ b/pep-0478.txt @@ -57,13 +57,15 @@ The releases so far: - 3.5.3 final: January 17, 2017 - 3.5.4 candidate 1: July 25, 2017 - 3.5.4 final: August 8, 2017 +- 3.5.5 candidate 1: January 23, 2018 +- 3.5.5 final: February 4, 2018 -.. There are no currently planned releases for Python 3.5. +.. There are no specific plans for the next release of Python 3.5. Planned future releases: -- 3.5.5 candidate 1: January 21, 2018 -- 3.5.5 final: February 4, 2018 +- 3.5.6 candidate 1: July 18, 2018 +- 3.5.6 final: August 1, 2018 diff --git a/pep-0484.txt b/pep-0484.txt index 43684d436..b5a055049 100644 --- a/pep-0484.txt +++ b/pep-0484.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Guido van Rossum , Jukka Lehtosalo , Łukasz Langa BDFL-Delegate: Mark Shannon Discussions-To: Python-Dev -Status: Accepted +Status: Provisional Type: Standards Track Content-Type: text/x-rst Created: 29-Sep-2014 @@ -342,7 +342,7 @@ Additionally, ``Any`` is a valid value for every type variable. Consider the following:: def count_truthy(elements: List[Any]) -> int: - return sum(1 for elem in elements if element) + return sum(1 for elem in elements if elem) This is equivalent to omitting the generic notation and just saying ``elements: List``. @@ -355,6 +355,7 @@ You can include a ``Generic`` base class to define a user-defined class as generic. Example:: from typing import TypeVar, Generic + from logging import Logger T = TypeVar('T') @@ -373,7 +374,7 @@ as generic. Example:: return self.value def log(self, message: str) -> None: - self.logger.info('{}: {}'.format(self.name message)) + self.logger.info('{}: {}'.format(self.name, message)) ``Generic[T]`` as a base class defines that the class ``LoggedVar`` takes a single type parameter ``T``. This also makes ``T`` valid as @@ -582,9 +583,9 @@ argument(s) is substituted. Otherwise, ``Any`` is assumed. Example:: T = TypeVar('T') class Node(Generic[T]): + x = None # type: T # Instance attribute (see below) def __init__(self, label: T = None) -> None: ... - x = None # Type: T x = Node('') # Inferred type is Node[str] y = Node(0) # Inferred type is Node[int] @@ -983,15 +984,17 @@ for example, the above is equivalent to:: def handle_employee(e: Optional[Employee]) -> None: ... -An optional type is also automatically assumed when the default value is -``None``, for example:: +A past version of this PEP allowed type checkers to assume an optional +type when the default value is ``None``, as in this code:: def handle_employee(e: Employee = None): ... -This is equivalent to:: +This would have been treated as equivalent to:: def handle_employee(e: Optional[Employee] = None) -> None: ... +This is no longer the recommended behavior. Type checkers should move +towards requiring the optional type to be made explicit. Support for singleton types in unions ------------------------------------- @@ -1367,11 +1370,12 @@ Positional-only arguments Some functions are designed to take their arguments only positionally, and expect their callers never to use the argument's name to provide that argument by keyword. All arguments with names beginning with -``__`` are assumed to be positional-only:: +``__`` are assumed to be positional-only, except if their names also +end with ``__``:: - def quux(__x: int) -> None: ... + def quux(__x: int, __y__: int = 0) -> None: ... - quux(3) # This call is fine. + quux(3, __y__=1) # This call is fine. quux(__x=3) # This call is an error. @@ -1409,7 +1413,7 @@ for example:: c = None # type: Coroutine[List[str], str, int] ... x = c.send('hi') # type: List[str] - async def bar(): -> None: + async def bar() -> None: x = await c # type: int The module also provides generic ABCs ``Awaitable``, @@ -1460,14 +1464,11 @@ No first-class syntax support for explicitly marking variables as being of a specific type is added by this PEP. To help with type inference in complex cases, a comment of the following format may be used:: - x = [] # type: List[Employee] + x = [] # type: List[Employee] x, y, z = [], [], [] # type: List[int], List[int], List[str] x, y, z = [], [], [] # type: (List[int], List[int], List[str]) a, b, *c = range(5) # type: float, float, List[float] - x = [ - 1, - 2, - ] # type: List[int] + x = [1, 2] # type: List[int] Type comments should be put on the last line of the statement that contains the variable definition. They can also be placed on @@ -1857,6 +1858,14 @@ Stub file package authors might use the following snippet in ``setup.py``:: ], ... +(*UPDATE:* As of June 2018 the recommended way to distribute type +hints for third-party packages has changed -- in addition to typeshed +(see the next section) there is now a standard for distributing type +hints, PEP 561. It supports separately installable packages containing +stubs, stub files included in the same distribution as the executable +code of a package, and inline type hints, the latter two options +enabled by including a file named ``py.typed`` in the package.) + The Typeshed Repo ----------------- diff --git a/pep-0485.txt b/pep-0485.txt index 1d800f4e9..720828b0d 100644 --- a/pep-0485.txt +++ b/pep-0485.txt @@ -463,7 +463,7 @@ Expected Uses The primary expected use case is various forms of testing -- "are the results computed near what I expect as a result?" This sort of test may or may not be part of a formal unit testing suite. Such testing -could be used one-off at the command line, in an iPython notebook, +could be used one-off at the command line, in an IPython notebook, part of doctests, or simple asserts in an ``if __name__ == "__main__"`` block. diff --git a/pep-0491.txt b/pep-0491.txt index 075203473..559ba8d4f 100644 --- a/pep-0491.txt +++ b/pep-0491.txt @@ -268,7 +268,7 @@ categories based on GNU autotools. This expanded scheme should help installers to implement system policy, but installers may root each category at any location. -A UNIX install scheme might map the categories to their installation patnhs +A UNIX install scheme might map the categories to their installation paths like this:: { diff --git a/pep-0494.txt b/pep-0494.txt index 389e00a3c..bf7647af2 100644 --- a/pep-0494.txt +++ b/pep-0494.txt @@ -34,9 +34,9 @@ Release Manager and Crew 3.6 Lifespan ============ -3.6 will receive bugfix updates approximately every 3-6 months for -approximately 18 months. After the release of 3.7.0 final, a final -3.6 bugfix update will be released. After that, it is expected that +3.6 will receive bugfix updates approximately every 3 months for +approximately 24 months. After the release of 3.7.0 final, two more +3.6 bugfix updates will be released. After that, it is expected that security updates (source only) will be released until 5 years after the release of 3.6 final, so until approximately December 2021. @@ -93,32 +93,39 @@ Actual: - 3.6.4 final: 2017-12-19 -Expected: - 3.6.5 schedule -------------- -- 3.6.5 candidate: 2018-03-12 (tenative) +- 3.6.5 candidate: 2018-03-13 -- 3.6.5 final: 2018-03-26 (tentative) +- 3.6.5 final: 2018-03-28 3.6.6 schedule -------------- -- 3.6.6 candidate: 2018-06-04 (tenative) +- 3.6.6 candidate: 2018-06-12 -- 3.6.6 final: 2018-06-15 (tentative) +- 3.6.6 final: 2018-06-27 + +Expected: 3.6.7 schedule -------------- +- 3.6.7 candidate: 2018-09-10 (tentative) + +- 3.6.7 final: 2018-09-24 (tentative) + +3.6.8 schedule +-------------- + Final maintenance mode release, final binary releases. -- 3.6.6 candidate: 2018-09-10 (tenative) +- 3.6.8 candidate: 2018-12-03 (tentative) -- 3.6.6 final: 2018-09-24 (tentative) +- 3.6.8 final: 2018-12-16 (tentative) -3.6.8 and beyond schedule +3.6.9 and beyond schedule ------------------------- Security fixes only, as needed, until 2021-12 diff --git a/pep-0505.rst b/pep-0505.rst new file mode 100644 index 000000000..3c0fd188c --- /dev/null +++ b/pep-0505.rst @@ -0,0 +1,748 @@ +PEP: 505 +Title: None-aware operators +Version: $Revision$ +Last-Modified: $Date$ +Author: Mark E. Haase , Steve Dower +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 18-Sep-2015 +Python-Version: 3.8 + +Abstract +======== + +Several modern programming languages have so-called "``null``-coalescing" or +"``null``- aware" operators, including C# [1]_, Dart [2]_, Perl, Swift, and PHP +(starting in version 7). These operators provide syntactic sugar for common +patterns involving null references. + +* The "``null``-coalescing" operator is a binary operator that returns its left + operand if it is not ``null``. Otherwise it returns its right operand. +* The "``null``-aware member access" operator accesses an instance member only + if that instance is non-``null``. Otherwise it returns ``null``. (This is also + called a "safe navigation" operator.) +* The "``null``-aware index access" operator accesses an element of a collection + only if that collection is non-``null``. Otherwise it returns ``null``. (This + is another type of "safe navigation" operator.) + +This PEP proposes three ``None``-aware operators for Python, based on the +definitions and other language's implementations of those above. Specifically: + +* The "``None`` coalescing`` binary operator ``??`` returns the left hand side + if it evaluates to a value that is not ``None``, or else it evaluates and + returns the right hand side. A coalescing ``??=`` augmented assignment + operator is included. +* The "``None``-aware attribute access" operator ``?.`` evaluates the complete + expression if the left hand side evaluates to a value that is not ``None`` +* The "``None``-aware indexing" operator ``?[]`` evaluates the complete + expression if the left hand site evaluates to a value that is not ``None`` + +Syntax and Semantics +==================== + +Specialness of ``None`` +----------------------- + +The ``None`` object denotes the lack of a value. For the purposes of these +operators, the lack of a value indicates that the remainder of the expression +also lacks a value and should not be evaluated. + +A rejected proposal was to treat any value that evaluates to false in a +Boolean context as not having a value. However, the purpose of these operators +is to propagate the "lack of value" state, rather that the "false" state. + +Some argue that this makes ``None`` special. We contend that ``None`` is +already special, and that using it as both the test and the result of these +operators does not change the existing semantics in any way. + +See the `Rejected Ideas`_ section for discussion on the rejected approaches. + +Grammar changes +--------------- + +The following rules of the Python grammar are updated to read:: + + augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=' | '??=') + + power: coalesce ['**' factor] + coalesce: atom_expr ['??' factor] + atom_expr: ['await'] atom trailer* + trailer: ('(' [arglist] ')' | + '[' subscriptlist ']' | + '?[' subscriptlist ']' | + '.' NAME | + '?.' NAME) + +Inserting the ``coalesce`` rule in this location ensures that expressions +resulting in ``None`` are natuarlly coalesced before they are used in +operations that would typically raise ``TypeError``. Like ``and`` and ``or`` +the right-hand expression is not evaluated until the left-hand side is +determined to be ``None``. For example:: + + a, b = None, None + def c(): return None + def ex(): raise Exception() + + (a ?? 2 ** b ?? 3) == a ?? (2 ** (b ?? 3)) + (a * b ?? c // d) == a * (b ?? c) // d + (a ?? True and b ?? False) == (a ?? True) and (b ?? False) + (c() ?? c() ?? True) == True + (True ?? ex()) == True + (c ?? ex)() == c() + +Augmented coalescing assignment only rebinds the name if its current value is +``None``. If the target name already has a value, the right-hand side is not +evaluated. For example:: + + a = None + b = '' + c = 0 + + a ??= 'value' + b ??= undefined_name + c ??= shutil.rmtree('/') # don't try this at home, kids + + assert a == 'value' + assert b == '' + assert c == '0' and any(os.scandir('/')) + +Adding new trailers for the other ``None``-aware operators ensures that they +may be used in all valid locations for the existing equivalent operators, +including as part of an assignment target (more details below). As the existing +evaluation rules are not directly embedded in the grammar, we specify the +required changes here. + +Assume that the ``atom`` is always successfully evaluated. Each ``trailer`` is +then evaluated from left to right, applying its own parameter (either its +arguments, subscripts or attribute name) to produce the value for the next +``trailer``. Finally, if present, ``await`` is applied. + +For example, ``await a.b(c).d[e]`` is currently parsed as +``['await', 'a', '.b', '(c)', '.d', '[e]']`` and evaluated:: + + _v = a + _v = _v.b + _v = _v(c) + _v = _v.d + _v = _v[e] + await _v + +When a ``None``-aware operator is present, the left-to-right evaluation may be +short-circuited. For example, ``await a?.b(c).d?[e]`` is evaluated:: + + _v = a + if _v is not None: + _v = _v.b + _v = _v(c) + _v = _v.d + if _v is not None: + _v = _v[e] + await _v + +.. note:: + ``await`` will almost certainly fail in this context, as it would in + the case where code attempts ``await None``. We are not proposing to add a + ``None``-aware ``await`` keyword here, and merely include it in this + example for completeness of the specification, since the ``atom_expr`` + grammar rule includes the keyword. If it were in its own rule, we would have + never mentioned it. + +Parenthesised expressions are handled by the ``atom`` rule (not shown above), +which will implicitly terminate the short-circuiting behaviour of the above +transformation. For example, ``(a?.b ?? c).d?.e`` is evaluated as:: + + # a?.b + _v = a + if _v is not None: + _v = _v.b + + # ... ?? c + if _v is None: + _v = c + + # (...).d?.e + _v = _v.d + if _v is not None: + _v = _v.e + +When used as an assignment target, the ``None``-aware operations may only be +used in a "load" context. That is, ``a?.b = 1`` and ``a?[b] = 1`` will raise +``SyntaxError``. Use earlier in the expression (``a?.b.c = 1``) is permitted, +though unlikely to be useful unless combined with a coalescing operation:: + + (a?.b ?? d).c = 1 + + +Examples +======== + +This section presents some examples of common ``None`` patterns and shows what +conversion to use ``None``-aware operators may look like. + +Standard Library +---------------- + +Using the ``find-pep505.py`` script[3]_ an analysis of the Python 3.7 standard +library discovered up to 678 code snippets that could be replaced with use of +one of the ``None``-aware operators:: + + $ find /usr/lib/python3.7 -name '*.py' | xargs python3.7 find-pep505.py + + Total None-coalescing `if` blocks: 449 + Total [possible] None-coalescing `or`: 120 + Total None-coalescing ternaries: 27 + Total Safe navigation `and`: 13 + Total Safe navigation `if` blocks: 61 + Total Safe navigation ternaries: 8 + +Some of these are shown below as examples before and after converting to use the +new operators. + +From ``bisect.py``:: + + def insort_right(a, x, lo=0, hi=None): + # ... + if hi is None: + hi = len(a) + # ... + +After updating to use the ``??=`` augmented assignment statement:: + + def insort_right(a, x, lo=0, hi=None): + # ... + hi ??= len(a) + # ... + +From ``calendar.py``:: + + encoding = options.encoding + if encoding is None: + encoding = sys.getdefaultencoding() + optdict = dict(encoding=encoding, css=options.css) + +After updating to use the ``??`` operator:: + + optdict = dict(encoding=encoding ?? sys.getdefaultencoding(), + css=options.css) + +From ``dis.py``:: + + def _get_const_info(const_index, const_list): + argval = const_index + if const_list is not None: + argval = const_list[const_index] + return argval, repr(argval) + +After updating to use the ``?[]`` and ``??`` operators:: + + def _get_const_info(const_index, const_list): + argval = const_list?[const_index] ?? const_index + return argval, repr(argval) + +From ``inspect.py``:: + + for base in object.__bases__: + for name in getattr(base, "__abstractmethods__", ()): + value = getattr(object, name, None) + if getattr(value, "__isabstractmethod__", False): + return True + +After updating to use the ``?.`` operator (and deliberately not converting to +use ``any()``):: + + for base in object.__bases__: + for name in base?.__abstractmethods__ ?? (): + if object?.name?.__isabstractmethod__: + return True + +From ``os.py``:: + + if entry.is_dir(): + dirs.append(name) + if entries is not None: + entries.append(entry) + else: + nondirs.append(name) + +After updating to use the ``?.`` operator:: + + if entry.is_dir(): + dirs.append(name) + entries?.append(entry) + else: + nondirs.append(name) + + +jsonify +------- + +This example is from a Python web crawler that uses the Flask framework as its +front-end. This function retrieves information about a web site from a SQL +database and formats it as JSON to send to an HTTP client:: + + class SiteView(FlaskView): + @route('/site/', methods=['GET']) + def get_site(self, id_): + site = db.query('site_table').find(id_) + + return jsonify( + first_seen=site.first_seen.isoformat() if site.first_seen is not None else None, + id=site.id, + is_active=site.is_active, + last_seen=site.last_seen.isoformat() if site.last_seen is not None else None, + url=site.url.rstrip('/') + ) + +Both ``first_seen`` and ``last_seen`` are allowed to be ``null`` in the +database, and they are also allowed to be ``null`` in the JSON response. JSON +does not have a native way to represent a ``datetime``, so the server's contract +states that any non-``null`` date is represented as an ISO-8601 string. + +Without knowing the exact semantics of the ``first_seen`` and ``last_seen`` +attributes, it is impossible to know whether the attribute can be safely or +performantly accessed multiple times. + +One way to fix this code is to replace each conditional expression with an +explicit value assignment and a full ``if``/``else`` block:: + + class SiteView(FlaskView): + @route('/site/', methods=['GET']) + def get_site(self, id_): + site = db.query('site_table').find(id_) + + first_seen_dt = site.first_seen + if first_seen_dt is None: + first_seen = None + else: + first_seen = first_seen_dt.isoformat() + + last_seen_dt = site.last_seen + if last_seen_dt is None: + last_seen = None + else: + last_seen = last_seen_dt.isoformat() + + return jsonify( + first_seen=first_seen, + id=site.id, + is_active=site.is_active, + last_seen=last_seen, + url=site.url.rstrip('/') + ) + +This adds ten lines of code and four new code paths to the function, +dramatically increasing the apparent complexity. Rewriting using the +``None``-aware attribute operator results in shorter code with more clear +intent:: + + class SiteView(FlaskView): + @route('/site/', methods=['GET']) + def get_site(self, id_): + site = db.query('site_table').find(id_) + + return jsonify( + first_seen=site.first_seen?.isoformat(), + id=site.id, + is_active=site.is_active, + last_seen=site.last_seen?.isoformat(), + url=site.url.rstrip('/') + ) + +Grab +---- + +The next example is from a Python scraping library called `Grab +`_:: + + class BaseUploadObject(object): + def find_content_type(self, filename): + ctype, encoding = mimetypes.guess_type(filename) + if ctype is None: + return 'application/octet-stream' + else: + return ctype + + class UploadContent(BaseUploadObject): + def __init__(self, content, filename=None, content_type=None): + self.content = content + if filename is None: + self.filename = self.get_random_filename() + else: + self.filename = filename + if content_type is None: + self.content_type = self.find_content_type(self.filename) + else: + self.content_type = content_type + + class UploadFile(BaseUploadObject): + def __init__(self, path, filename=None, content_type=None): + self.path = path + if filename is None: + self.filename = os.path.split(path)[1] + else: + self.filename = filename + if content_type is None: + self.content_type = self.find_content_type(self.filename) + else: + self.content_type = content_type + +This example contains several good examples of needing to provide default +values. Rewriting to use conditional expressions reduces the overall lines of +code, but does not necessarily improve readability:: + + class BaseUploadObject(object): + def find_content_type(self, filename): + ctype, encoding = mimetypes.guess_type(filename) + return 'application/octet-stream' if ctype is None else ctype + + class UploadContent(BaseUploadObject): + def __init__(self, content, filename=None, content_type=None): + self.content = content + self.filename = (self.get_random_filename() if filename + is None else filename) + self.content_type = (self.find_content_type(self.filename) + if content_type is None else content_type) + + class UploadFile(BaseUploadObject): + def __init__(self, path, filename=None, content_type=None): + self.path = path + self.filename = (os.path.split(path)[1] if filename is + None else filename) + self.content_type = (self.find_content_type(self.filename) + if content_type is None else content_type) + +The first ternary expression is tidy, but it reverses the intuitive order of +the operands: it should return ``ctype`` if it has a value and use the string +literal as fallback. The other ternary expressions are unintuitive and so +long that they must be wrapped. The overall readability is worsened, not +improved. + +Rewriting using the ``None`` coalescing operator:: + + class BaseUploadObject(object): + def find_content_type(self, filename): + ctype, encoding = mimetypes.guess_type(filename) + return ctype ?? 'application/octet-stream' + + class UploadContent(BaseUploadObject): + def __init__(self, content, filename=None, content_type=None): + self.content = content + self.filename = filename ?? self.get_random_filename() + self.content_type = content_type ?? self.find_content_type(self.filename) + + class UploadFile(BaseUploadObject): + def __init__(self, path, filename=None, content_type=None): + self.path = path + self.filename = filename ?? os.path.split(path)[1] + self.content_type = content_type ?? self.find_content_type(self.filename) + +This syntax has an intuitive ordering of the operands. In ``find_content_type``, +for example, the preferred value ``ctype`` appears before the fallback value. +The terseness of the syntax also makes for fewer lines of code and less code to +visually parse, and reading from left-to-right and top-to-bottom more accurately +follows the execution flow. + + +Rejected Ideas +============== + +The first three ideas in this section are oft-proposed alternatives to treating +``None`` as special. For further background on why these are rejected, see their +treatment in `PEP 531 `_ and +`PEP 532 `_ and the associated +discussions. + +No-Value Protocol +----------------- + +The operators could be generalised to user-defined types by defining a protocol +to indicate when a value represents "no value". Such a protocol may be a dunder +method ``__has_value__(self)` that returns ``True`` if the value should be +treated as having a value, and ``False`` if the value should be treated as no +value. + +With this generalization, ``object`` would implement a dunder method equivalent +to this:: + + def __has_value__(self): + return True + +``NoneType`` would implement a dunder method equivalent to this:: + + def __has_value__(self): + return False + +In the specification section, all uses of ``x is None`` would be replaced with +``not x.__has_value__()``. + +This generalization would allow for domain-specific "no-value" objects to be +coalesced just like ``None``. For example the ``pyasn1`` package has a type +called ``Null`` that represents an ASN.1 ``null``:: + + >>> from pyasn1.type import univ + >>> univ.Null() ?? univ.Integer(123) + Integer(123) + +Similarly, values such as ``math.nan`` and ``NotImplemented`` could be treated +as representing no value. + +However, the "no-value" nature of these values is domain-specific, which means +they *should* be treated as a value by the language. For example, +``math.nan.imag`` is well defined (it's ``0.0``), and so short-circuiting +``math.nan?.imag`` to return ``math.nan`` would be incorrect. + +As ``None`` is already defined by the language as being the value that +represents "no value", and the current specification would not preclude +switching to a protocol in the future (though changes to built-in objects would +not be compatible), this idea is rejected for now. + +Boolean-aware operators +----------------------- + +This suggestion is fundamentally the same as adding a no-value protocol, and so +the discussion above also applies. + +Similar behavior to the ``??`` operator can be achieved with an ``or`` +expression, however ``or`` checks whether its left operand is false-y and not +specifically ``None``. This approach is attractive, as it requires fewer changes +to the language, but ultimately does not solve the underlying problem correctly. + +Assuming the check is for truthiness rather than ``None``, there is no longer a +need for the ``??`` operator. However, applying this check to the ``?.`` and +``?[]`` operators prevents perfectly valid operations applying + +Consider the following example, where ``get_log_list()`` may return either a +list containing current log messages (potentially empty), or ``None`` if logging +is not enabled:: + + lst = get_log_list() + lst?.append('A log message') + +If ``?.`` is checking for true values rather than specifically ``None`` and the +log has not been initialized with any items, no item will ever be appended. This +violates the obvious intent of the code, which is to append an item. The +``append`` method is available on an empty list, as are all other list methods, +and there is no reason to assume that these members should not be used because +the list is presently empty. + +Further, there is no sensible result to use in place of the expression. A +normal ``lst.append`` returns ``None``, but under this idea ``lst?.append`` may +result in either ``[]`` or ``None``, depending on the value of ``lst``. As with +the examples in the previous section, this makes no sense. + +As checking for truthiness rather than ``None`` results in apparently valid +expressions no longer executing as intended, this idea is rejected. + +Exception-aware operators +------------------------- + +Arguably, the reason to short-circuit an expression when ``None`` is encountered +is to avoid the ``AttributeError`` or ``TypeError`` that would be raised under +normal circumstances. As an alternative to testing for ``None``, the ``?.`` and +``?[]`` operators could instead handle ``AttributeError`` and ``TypeError`` +raised by the operation and skip the remainder of the expression. + +This produces a transformation for ``a?.b.c?.d.e`` similar to this:: + + _v = a + try: + _v = _v.b + except AttributeError: + pass + else: + _v = _v.c + try: + _v = _v.d + except AttributeError: + pass + else: + _v = _v.e + +One open question is which value should be returned as the expression when an +exception is handled. The above example simply leaves the partial result, but +this is not helpful for replacing with a default value. An alternative would be +to force the result to ``None``, which then raises the question as to why +``None`` is special enough to be the result but not special enough to be the +test. + +Secondly, this approach masks errors within code executed implicitly as part of +the expression. For ``?.``, any ``AttributeError`` within a property or +``__getattr__`` implementation would be hidden, and similarly for ``?[]`` and +``__getitem__`` implementations. + +Similarly, simple typing errors such as ``{}?.ietms()`` could go unnoticed. + +Existing conventions for handling these kinds of errors in the form of the +``getattr`` builtin and the ``.get(key, default)`` method pattern established by +``dict`` show that it is already possible to explicitly use this behaviour. + +As this approach would hide errors in code, it is rejected. + +``None``-aware Function Call +---------------------------- + +The ``None``-aware syntax applies to attribute and index access, so it seems +natural to ask if it should also apply to function invocation syntax. It might +be written as ``foo?()``, where ``foo`` is only called if it is not None. + +This has been deferred on the basis of the proposed operators being intended +to aid traversal of partially populated hierarchical data structures, *not* +for traversal of arbitrary class hierarchies. This is reflected in the fact +that none of the other mainstream languages that already offer this syntax +have found it worthwhile to support a similar syntax for optional function +invocations. + +A workaround similar to that used by C# would be to write +``maybe_none?.__call__(arguments)``. If the callable is ``None``, the +expression will not be evaluated. (The C# equivalent uses ``?.Invoke()`` on its +callable type.) + +``?`` Unary Postfix Operator +---------------------------- + +To generalize the ``None``-aware behavior and limit the number of new operators +introduced, a unary, postfix operator spelled ``?`` was suggested. The idea is +that ``?`` might return a special object that could would override dunder +methods that return ``self``. For example, ``foo?`` would evaluate to ``foo`` if +it is not ``None``, otherwise it would evaluate to an instance of +``NoneQuestion``:: + + class NoneQuestion(): + def __call__(self, *args, **kwargs): + return self + + def __getattr__(self, name): + return self + + def __getitem__(self, key): + return self + + +With this new operator and new type, an expression like ``foo?.bar[baz]`` +evaluates to ``NoneQuestion`` if ``foo`` is None. This is a nifty +generalization, but it's difficult to use in practice since most existing code +won't know what ``NoneQuestion`` is. + +Going back to one of the motivating examples above, consider the following:: + + >>> import json + >>> created = None + >>> json.dumps({'created': created?.isoformat()})`` + +The JSON serializer does not know how to serialize ``NoneQuestion``, nor will +any other API. This proposal actually requires *lots of specialized logic* +throughout the standard library and any third party library. + +At the same time, the ``?`` operator may also be **too general**, in the sense +that it can be combined with any other operator. What should the following +expressions mean?:: + + >>> x? + 1 + >>> x? -= 1 + >>> x? == 1 + >>> ~x? + +This degree of generalization is not useful. The operators actually proposed +herein are intentionally limited to a few operators that are expected to make it +easier to write common code patterns. + +Built-in ``maybe`` +------------------ + +Haskell has a concept called `Maybe `_ that +encapsulates the idea of an optional value without relying on any special +keyword (e.g. ``null``) or any special instance (e.g. ``None``). In Haskell, the +purpose of ``Maybe`` is to avoid separate handling of "something" and nothing". + +A Python package called `pymaybe `_ provides a +rough approximation. The documentation shows the following example:: + + >>> maybe('VALUE').lower() + 'value' + + >>> maybe(None).invalid().method().or_else('unknown') + 'unknown' + +The function ``maybe()`` returns either a ``Something`` instance or a +``Nothing`` instance. Similar to the unary postfix operator described in the +previous section, ``Nothing`` overrides dunder methods in order to allow +chaining on a missing value. + +Note that ``or_else()`` is eventually required to retrieve the underlying value +from ``pymaybe``'s wrappers. Furthermore, ``pymaybe`` does not short circuit any +evaluation. Although ``pymaybe`` has some strengths and may be useful in its own +right, it also demonstrates why a pure Python implementation of coalescing is +not nearly as powerful as support built into the language. + +The idea of adding a builtin ``maybe`` type to enable this scenario is rejected. + +Just use a conditional expression +--------------------------------- + +Another common way to initialize default values is to use the ternary operator. +Here is an excerpt from the popular `Requests package +`_:: + + data = [] if data is None else data + files = [] if files is None else files + headers = {} if headers is None else headers + params = {} if params is None else params + hooks = {} if hooks is None else hooks + +This particular formulation has the undesirable effect of putting the operands +in an unintuitive order: the brain thinks, "use ``data`` if possible and use +``[]`` as a fallback," but the code puts the fallback *before* the preferred +value. + +The author of this package could have written it like this instead:: + + data = data if data is not None else [] + files = files if files is not None else [] + headers = headers if headers is not None else {} + params = params if params is not None else {} + hooks = hooks if hooks is not None else {} + +This ordering of the operands is more intuitive, but it requires 4 extra +characters (for "not "). It also highlights the repetition of identifiers: +``data if data``, ``files if files``, etc. + +When written using the ``None`` coalescing operator, the sample reads:: + + data = data ?? [] + files = files ?? [] + headers = headers ?? {} + params = params ?? {} + hooks = hooks ?? {} + + +References +========== + +.. [1] C# Reference: Operators + (https://msdn.microsoft.com/en-us/library/6a71f45d.aspx) + +.. [2] A Tour of the Dart Language: Operators + (https://www.dartlang.org/docs/dart-up-and-running/ch02.html#operators) + +.. [3] Associated scripts + (https://github.com/python/peps/tree/master/pep-0505/) + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0505.txt b/pep-0505.txt deleted file mode 100644 index fffbd3266..000000000 --- a/pep-0505.txt +++ /dev/null @@ -1,1107 +0,0 @@ -PEP: 505 -Title: None-aware operators -Version: $Revision$ -Last-Modified: $Date$ -Author: Mark E. Haase -Status: Deferred -Type: Standards Track -Content-Type: text/x-rst -Created: 18-Sep-2015 -Python-Version: 3.8 - -PEP Deferral -============ - -Further consideration of this PEP has been deferred until Python 3.8 at the -earliest. - - -Abstract -======== - -Several modern programming languages have so-called "``null``-coalescing" or -"``null``- aware" operators, including C# [1]_, Dart [2]_, Perl, Swift, and PHP -(starting in version 7). These operators provide syntactic sugar for common -patterns involving null references. - -* The "``null``-coalescing" operator is a binary operator that returns its left - operand if it is not ``null``. Otherwise it returns its right operand. -* The "``null``-aware member access" operator accesses an instance member only - if that instance is non-``null``. Otherwise it returns ``null``. (This is also - called a "safe navigation" operator.) -* The "``null``-aware index access" operator accesses an element of a collection - only if that collection is non-``null``. Otherwise it returns ``null``. (This - is another type of "safe navigation" operator.) - -The purpose of this PEP is to explore the possibility of implementing similar -operators in Python. It provides some background material and then offers -several competing alternatives for implementation. - -The initial reaction to this idea is majority negative. Even if ultimately -rejected, this PEP still serves a purpose: to fully document the reasons why -Python should not add this behavior, so that it can be pointed to in the future -when the question inevitably arises again. (This is the null alternative, so to -speak!) - -This proposal advances multiple alternatives, and it should be considered -severable. It may be accepted in whole or in part. For example, the safe -navigation operators might be rejected even if the ``null``-coalescing operator -is approved, or vice-versa. - -Of course, Python does not have ``null``; it has ``None``, which is conceptually -distinct. Although this PEP is inspired by "``null``-aware" operators in other -languages, it uses the term "``None``-aware" operators to describe some -hypothetical Python implementations. - - -Background -========== - -Specialness of ``None`` ------------------------ - -The Python language does not currently define any special behavior for ``None``. -This PEP suggests making ``None`` a special case. This loss of generality is a -noticeable drawback of the proposal. A generalization of ``None``-aware -operators is set forth later in this document in order to avoid this -specialization. - - -Utility of ``None`` -------------------- - -One common criticism of adding special syntax for ``None`` is that ``None`` -shouldn't be used in the first place: it's a code smell. A related criticism is -that ``None``-aware operators are used to silence errors (such as the novice -misunderstanding of an implicit ``return None``) akin to `PHP's @ operator -`_. Therefore, -the utility of ``None`` must be debated before discussing whether to add new -behavior around it. - -Python does not have any concept of ``null``. Every Python identifier must -refer to an instance, so there cannot be any ``null`` references. Python does -have a special instance called ``None`` that can be used to represent missing -values, but ``None`` is conceptually distinct from ``null``. - -The most frequent use of ``None`` in Python is to provide a default value for -optional arguments when some other default object is unwieldy. For example: -``def get(url, proxy=None):``. In this case, ``proxy`` is an optional -argument. If ``proxy`` is ``None``, then the request should be sent directly to -the server; otherwise, the request should be routed through the specified proxy -server. This use of ``None`` is preferred here to some other sentinel value or -the Null Object Pattern. [3]_ - -Examples of this form abound. Consider ``types.py`` in the standard library:: - - def prepare_class(name, bases=(), kwds=None): - if kwds is None: - kwds = {} - else: - kwds = dict(kwds) - ... - -Another frequent use of ``None`` is interfacing with external systems. Many of -those other systems have a concept of ``null``. Therefore, Python code must have -a way of representing ``null``, and typically it is represented by ``None``. For -example, databases can have ``null`` values, and most Python database drivers -will convert ``null`` to ``None`` when retrieving data from a database, and will -convert from ``None`` back to ``null`` when sending data to a database. - -This convention of interchanging ``null`` and ``None`` is widespread in Python. -It is canonized in the Python DBAPI (PEP-249). [4]_ The ``json`` module in the -standard library and the third party PyYAML package both use ``None`` to -represent their respective languages' ``null``. - -The C language ``null`` often bleeds into Python, too, particularly for thin -wrappers around C libraries. For example, in ``pyopenssl``, the ``X509`` class -has a ``get_notBefore()`` `method `_ that returns -either a timestamp or ``None``. This function is a thin wrapper around an -OpenSSL function with the return type ``ASN1_TIME *``. Because this C pointer -may be ``null``, the Python wrapper must be able to represent ``null``, and -``None`` is the chosen representation. - -The representation of ``null`` is particularly noticeable when Python code is -marshalling data between two systems. For example, consider a Python server that -fetches data from a database and converts it to JSON for consumption by another -process. In this case, it's often desirable that ``null`` in the database can be -easily translated to ``null`` in JSON. If ``None`` is not used for this purpose, -then each package will have to define its own representation of ``null``, and -converting between these representations adds unnecessary complexity to the -Python glue code. - -Therefore, the preference for avoiding ``None`` is nothing more than a -preference. ``None`` has legitimate uses, particularly in specific types of -software. Any hypothetical ``None``-aware operators should be construed as -syntactic sugar for simplifying common patterns involving ``None``, and *should -not be construed* as error handling behavior. - - -Behavior In Other Languages ---------------------------- - -Given that ``null``-aware operators exist in other modern languages, it may be -helpful to quickly understand how they work in those languages:: - - /* Null-coalescing. */ - - String s1 = null; - String s2 = "hello"; - String s3 = s1 ?? s2; - Console.WriteLine("s3 is: " + s3); - // s3 is: hello - - /* Null-aware member access, a.k.a. safe navigation. */ - - Console.WriteLine("s1.Length is: " + s1?.Length); - Console.WriteLine("s2.Length is: " + s2?.Length); - // s1.Length is: - // s2.Length is: 5 - - /* Null-aware index access, a.k.a. safe navigation. */ - - Dictionary d1 = null; - Dictionary d2 = new Dictionary - { - { "foo", "bar" }, - { "baz", "bat" } - }; - - Console.WriteLine("d1[\"foo\"] is: " + d1?["foo"]); - Console.WriteLine("d2[\"foo\"] is: " + d2?["foo"]); - // d1["foo"] is: - // d2["foo"] is: bar - - /* Short Circuiting */ - - Console.WriteLine("s1 trim/upper is: " + s1?.Trim().Length); - Console.WriteLine("s2 trim/upper is: " + s2?.Trim().Length); - // s1 trimmed length is: - // s2 trimmed length is: 5 - - String s4 = s1 ?? s2 ?? DoError(); - Console.WriteLine("s4 is: " + s4) - // s4 is: hello - -A `working example `_ can be viewed online. - -Of utmost importance, notice the short circuiting behavior. The short circuiting -of ``??`` is similar to short circuiting of other boolean operators such as -``||`` or ``&&`` and should not be surprising. Helpfully, `?.` is *also* short -circuiting: ``s1?.Trim()`` evaluates to null, but ``s1?.Trim().Length`` does not -attempt to dereference the ``null`` pointer. - - -Rationale -========= - -Existing Alternatives ---------------------- - -Python does not have any specific ``None``-aware operators, but it does have -operators that can be used for a similar purpose. This section describes why -these alternatives may be undesirable for some common ``None`` patterns. - - -``or`` Operator -~~~~~~~~~~~~~~~ - -Similar behavior can be achieved with the ``or`` operator, but ``or`` checks -whether its left operand is false-y, not specifically ``None``. This can lead -to surprising behavior. Consider the scenario of computing the price of some -products a customer has in his/her shopping cart:: - - >>> price = 100 - >>> default_quantity = 1 - # If user didn't specify a quantity, then assume the default. - >>> requested_quantity = None - >>> (requested_quantity or default_quantity) * price - 100 - # The user added 5 items to the cart. - >>> requested_quantity = 5 - >>> (requested_quantity or default_quantity) * price - 500 - # User removed 5 items from cart. - >>> requested_quantity = 0 - >>> (requested_quantity or default_quantity) * price # oops! - 100 - -An experienced Python developer should know how ``or`` works and be capable of -avoiding bugs like this. However, getting in the habit of using ``or`` for this -purpose still might cause an experienced developer to occasionally make this -mistake, especially when refactoring existing code and not carefully paying -attention to the possible values of the left-hand operand. - -For inexperienced developers, the problem is worse. The top Google hit for -"python null coalesce" is a `StackOverflow page -`_, and the top answer says to use ``or``. -The top answer goes on to explain the caveats of using ``or`` like this, but how -many beginning developers go on to read all those caveats? The accepted answer -on `a more recent question `_ says to use -``or`` without any caveats at all. These two questions have a combined 26,000 -views! - -The common usage of ``or`` for the purpose of providing default values is -undeniable, and yet it is also booby-trapped for unsuspecting newcomers. This -suggests that a safe operator for providing default values would have positive -utility. While some critics claim that ``None``-aware operators will be abused -for error handling, they are no more prone to abuse than ``or`` is. - - -Ternary Operator -~~~~~~~~~~~~~~~~ - -Another common way to initialize default values is to use the ternary operator. -Here is an excerpt from the popular `Requests package -`_:: - - data = [] if data is None else data - files = [] if files is None else files - headers = {} if headers is None else headers - params = {} if params is None else params - hooks = {} if hooks is None else hooks - -This particular formulation has the undesirable effect of putting the operands -in an unintuitive order: the brain thinks, "use ``data`` if possible and use -``[]`` as a fallback," but the code puts the fallback *before* the preferred -value. - -The author of this package could have written it like this instead:: - - data = data if data is not None else [] - files = files if files is not None else [] - headers = headers if headers is not None else {} - params = params if params is not None else {} - hooks = hooks if hooks is not None else {} - -This ordering of the operands is more intuitive, but it requires 4 extra -characters (for "not "). It also highlights the repetition of identifiers: -``data if data``, ``files if files``, etc. This example benefits from short -identifiers, but what if the tested expression is longer and/or has side -effects? This is addressed in the next section. - - -Motivating Examples -------------------- - -The purpose of this PEP is to simplify some common patterns involving ``None``. -This section presents some examples of common ``None`` patterns and explains -the drawbacks. - -This first example is from a Python web crawler that uses the popular Flask -framework as a front-end. This function retrieves information about a web site -from a SQL database and formats it as JSON to send to an HTTP client:: - - class SiteView(FlaskView): - @route('/site/', methods=['GET']) - def get_site(self, id_): - site = db.query('site_table').find(id_) - - return jsonify( - first_seen=site.first_seen.isoformat() if site.first_seen is not None else None, - id=site.id, - is_active=site.is_active, - last_seen=site.last_seen.isoformat() if site.last_seen is not None else None, - url=site.url.rstrip('/') - ) - -Both ``first_seen`` and ``last_seen`` are allowed to be ``null`` in the -database, and they are also allowed to be ``null`` in the JSON response. JSON -does not have a native way to represent a ``datetime``, so the server's contract -states that any non-``null`` date is represented as an ISO-8601 string. - -Note that this code is invalid by PEP-8 standards: several lines are over the -line length limit. In fact, *including it in this document* violates the PEP -formatting standard! But it's not unreasonably indented, nor are any of the -identifiers excessively long. The excessive line length is due to the -repetition of identifiers on both sides of the ternary ``if`` and the verbosity -of the ternary itself (10 characters out of a 78 character line length). - -One way to fix this code is to replace each ternary with a full ``if/else`` -block:: - - class SiteView(FlaskView): - @route('/site/', methods=['GET']) - def get_site(self, id_): - site = db.query('site_table').find(id_) - - if site.first_seen is None: - first_seen = None - else: - first_seen = site.first_seen.isoformat() - - if site.last_seen is None: - last_seen = None - else: - last_seen = site.last_seen.isoformat() - - return jsonify( - first_seen=first_seen, - id=site.id, - is_active=site.is_active, - last_seen=last_seen, - url=site.url.rstrip('/') - ) - -This version definitely isn't *bad*. It is easy to read and understand. On the -other hand, adding 8 lines of code to express this common behavior feels a bit -heavy, especially for a deliberately simplified example. If a larger, more -complicated data model was being used, then it would get tedious to continually -write in this long form. The readability would start to suffer as the number of -lines in the function grows, and a refactoring would be forced. - -Another alternative is to rename some of the identifiers:: - - class SiteView(FlaskView): - @route('/site/', methods=['GET']) - def get_site(self, id_): - site = db.query('site_table').find(id_) - - fs = site.first_seen - ls = site.last_seen - - return jsonify( - first_seen=fs.isodate() if fs is not None else None, - id=site.id, - is_active=site.is_active, - last_seen=ls.isodate() if ls is not None else None,, - url=site.url.rstrip('/') - ) - -This adds fewer lines of code than the previous example, but it comes at the -expense of introducing extraneous identifiers that amount to nothing more than -aliases. These new identifiers are short enough to fit a ternary expression onto -one line, but the identifiers are also less intuitive, e.g. ``fs`` versus -``first_seen``. - -As a quick preview, consider an alternative rewrite using a new operator:: - - class SiteView(FlaskView): - @route('/site/', methods=['GET']) - def get_site(self, id_): - site = db.query('site_table').find(id_) - - return jsonify( - first_seen=site.first_seen?.isoformat(), - id=site.id, - is_active=site.is_active, - last_seen=site.last_seen?.isoformat(), - url=site.url.rstrip('/') - ) - -The ``?.`` operator behaves as a "safe navigation" operator, allowing a more -concise syntax where the expression ``site.first_seen`` is not duplicated. - -The next example is from a trending project on GitHub called `Grab -`_, which is a Python scraping library:: - - class BaseUploadObject(object): - def find_content_type(self, filename): - ctype, encoding = mimetypes.guess_type(filename) - if ctype is None: - return 'application/octet-stream' - else: - return ctype - - class UploadContent(BaseUploadObject): - def __init__(self, content, filename=None, content_type=None): - self.content = content - if filename is None: - self.filename = self.get_random_filename() - else: - self.filename = filename - if content_type is None: - self.content_type = self.find_content_type(self.filename) - else: - self.content_type = content_type - - class UploadFile(BaseUploadObject): - def __init__(self, path, filename=None, content_type=None): - self.path = path - if filename is None: - self.filename = os.path.split(path)[1] - else: - self.filename = filename - if content_type is None: - self.content_type = self.find_content_type(self.filename) - else: - self.content_type = content_type - -.. note:: - - I don't know the author of the Grab project. I used it as an example - because it is a trending repo on GitHub and it has good examples of common - ``None`` patterns. - -This example contains several good examples of needing to provide default -values. It is a bit verbose as it is, and it is certainly not improved by the -ternary operator:: - - class BaseUploadObject(object): - def find_content_type(self, filename): - ctype, encoding = mimetypes.guess_type(filename) - return 'application/octet-stream' if ctype is None else ctype - - class UploadContent(BaseUploadObject): - def __init__(self, content, filename=None, content_type=None): - self.content = content - self.filename = self.get_random_filename() if filename \ - is None else filename - self.content_type = self.find_content_type(self.filename) \ - if content_type is None else content_type - - class UploadFile(BaseUploadObject): - def __init__(self, path, filename=None, content_type=None): - self.path = path - self.filename = os.path.split(path)[1] if filename is \ - None else filename - self.content_type = self.find_content_type(self.filename) \ - if content_type is None else content_type - -The first ternary expression is tidy, but it reverses the intuitive order of -the operands: it should return ``ctype`` if it has a value and use the string -literal as fallback. The other ternary expressions are unintuitive and so -long that they must be wrapped. The overall readability is worsened, not -improved. - -This code *might* be improved, though, if there was a syntactic shortcut for -this common need to supply a default value:: - - class BaseUploadObject(object): - def find_ctype(self, filename): - ctype, encoding = mimetypes.guess_type(filename) - return ctype ?? 'application/octet-stream' - - class UploadContent(BaseUploadObject): - def __init__(self, content, filename=None, content_type=None): - self.content = content - self.filename = filename ?? self.get_random_filename() - self.content_type = content_type ?? self.find_ctype(self.filename) - - class UploadFile(BaseUploadObject): - def __init__(self, path, filename=None, content_type=None): - self.path = path - self.filename = filename ?? os.path.split(path)[1] - self.content_type = content_type ?? self.find_ctype(self.filename) - -This syntax has an intuitive ordering of the operands, e.g. ``ctype`` -- the -preferred value -- comes before the fallback value. The terseness of the syntax -also makes for fewer lines of code and less code to visually parse. - -.. note:: - - I cheated on the last example: I renamed ``find_content_type`` to - ``find_ctype`` in order to fit two of the lines under 80 characters. If you - find this underhanded, you can go back and apply the same renaming to the - previous 2 examples. You'll find that it doesn't change the - conclusions. - - -Usage Of ``None`` In The Standard Library ------------------------------------------ - -The previous sections show some code patterns that are claimed to be "common", -but how common are they? The attached script `find-pep505.py -`_ is meant -to answer this question. It uses the ``ast`` module to search for variations of -the following patterns in any ``*.py`` file:: - - >>> # None-coalescing if block - ... - >>> if a is None: - ... a = b - - >>> # [Possible] None-coalescing "or" operator - ... - >>> a or 'foo' - >>> a or [] - >>> a or {} - - >>> # None-coalescing ternary - ... - >>> a if a is not None else b - >>> b if a is None else a - - >>> # Safe navigation "and" operator - ... - >>> a and a.foo - >>> a and a['foo'] - >>> a and a.foo() - - >>> # Safe navigation if block - ... - >>> if a is not None: - ... a.foo() - - >>> # Safe navigation ternary - ... - >>> a.foo if a is not None else b - >>> b if a is None else a.foo - -This script takes one or more names of Python source files to analyze:: - - $ python3 find-pep505.py test.py - $ find /usr/lib/python3.4 -name '*.py' | xargs python3 find-pep505.py - -The script prints out any matches it finds. Sample:: - - None-coalescing if block: /usr/lib/python3.4/inspect.py:594 - if _filename is None: - _filename = getsourcefile(object) or getfile(object) - - [Possible] None-coalescing `or`: /usr/lib/python3.4/lib2to3/refactor.py:191 - self.explicit = explicit or [] - - None-coalescing ternary: /usr/lib/python3.4/decimal.py:3909 - self.clamp = clamp if clamp is not None else dc.clamp - - Safe navigation `and`: /usr/lib/python3.4/weakref.py:512 - obj = info and info.weakref() - - Safe navigation `if` block: /usr/lib/python3.4/http/cookiejar.py:1895 - if k is not None: - lc = k.lower() - else: - lc = None - - Safe navigation ternary: /usr/lib/python3.4/sre_parse.py:856 - literals = [None if s is None else s.encode('latin-1') for s in literals] - -.. note:: - - Coalescing with ``or`` is marked as a "possible" match, because it's not - trivial to infer whether ``or`` is meant to coalesce False-y values - (correct) or if it meant to coalesce ``None`` (incorrect). On the other - hand, we assume that ``and`` is always incorrect for safe navigation. - -The script has been tested against `test.py -`_ and the Python -3.4 standard library, but it should work on any arbitrary Python 3 source code. -The complete output from running it against the standard library is attached to -this proposal as `find-pep505.out -`_. - -The script counts how many matches it finds and prints the totals at the -end:: - - Total None-coalescing `if` blocks: 426 - Total [possible] None-coalescing `or`: 119 - Total None-coalescing ternaries: 21 - Total Safe navigation `and`: 9 - Total Safe navigation `if` blocks: 55 - Total Safe navigation ternaries: 7 - -This is a total of 637 possible matches for these common code patterns in the -standard library. Allowing for some false positives and false negatives, it is -fair to say that these code patterns are definitely common in the standard -library. - - -Rejected Ideas --------------- - -Several related ideas were discussed on python-ideas, and some of these were -roundly rejected by BDFL, the community, or both. For posterity's sake, some of -those ideas are recorded here. - -``None``-aware Function Call -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``None``-aware syntax applies to attribute and index access, so it seems -natural to ask if it should also apply to function invocation syntax. It might -be written as ``foo?()``, where ``foo`` is only called if it is not None. - -This has been rejected on the basis of the proposed operators being intended -to aid traversal of partially populated hierarchical data structures, *not* -for traversal of arbitrary class hierarchies. This is reflected in the fact -that none of the other mainstream languages that already offer this syntax -have found it worthwhile to support a similar syntax for optional function -invocations. - -``?`` Unary Postfix Operator -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To generalize the ``None``-aware behavior and limit the number of new operators -introduced, a unary, postfix operator spelled ``?`` was suggested. The idea is -that ``?`` might return a special object that could would override dunder -methods that return ``self``. For example, ``foo?`` would evaluate to ``foo`` if -it is not ``None``, otherwise it would evaluate to an instance of -``NoneQuestion``:: - - class NoneQuestion(): - def __call__(self, *args, **kwargs): - return self - - def __getattr__(self, name): - return self - - def __getitem__(self, key): - return self - - -With this new operator and new type, an expression like ``foo?.bar[baz]`` -evaluates to ``NoneQuestion`` if ``foo`` is None. This is a nifty -generalization, but it's difficult to use in practice since most existing code -won't know what ``NoneQuestion`` is. - -Going back to one of the motivating examples above, consider the following:: - - >>> import json - >>> created = None - >>> json.dumps({'created': created?.isoformat()})`` - -The JSON serializer does not know how to serialize ``NoneQuestion``, nor will -any other API. This proposal actually requires *lots of specialized logic* -throughout the standard library and any third party library. - -At the same time, the ``?`` operator may also be **too general**, in the sense -that it can be combined with any other operator. What should the following -expressions mean?:: - - >>> x? + 1 - >>> x? -= 1 - >>> x? == 1 - >>> ~x? - -This degree of generalization is not useful. The operators actually proposed -herein are intentionally limited to a few operators that are expected to make it -easier to write common code patterns. - -Haskell-style ``Maybe`` -~~~~~~~~~~~~~~~~~~~~~~~ - -Haskell has a concept called `Maybe `_ that -encapsulates the idea of an optional value without relying on any special -keyword (e.g. ``null``) or any special instance (e.g. ``None``). In Haskell, the -purpose of ``Maybe`` is to avoid separate handling of "something" and nothing". -The concept is so heavily intertwined with Haskell's lazy evaluation that it -doesn't translate cleanly into Python. - -There is a Python package called `pymaybe -`_ that provides a rough -approximation. The documentation shows the following example that appears -relevant to the discussion at hand:: - - >>> maybe('VALUE').lower() - 'value' - - >>> maybe(None).invalid().method().or_else('unknown') - 'unknown' - -The function ``maybe()`` returns either a ``Something`` instance or a -``Nothing`` instance. Similar to the unary postfix operator described in the -previous section, ``Nothing`` overrides dunder methods in order to allow -chaining on a missing value. - -Note that ``or_else()`` is eventually required to retrieve the underlying value -from ``pymaybe``'s wrappers. Furthermore, ``pymaybe`` does not short circuit any -evaluation. Although ``pymaybe`` has some strengths and may be useful in its own -right, it also demonstrates why a pure Python implementation of coalescing is -not nearly as powerful as support built into the language. - - -Specification -============= - -This PEP suggests 3 new operators be added to Python: - -1. ``None``-coalescing operator -2. ``None``-aware attribute access -3. ``None``-aware index access/slicing - -We will continue to assume the same spellings as in -the previous sections in order to focus on behavior before diving into the much -more contentious issue of how to spell these operators. - -A generalization of these operators is also proposed below under the heading -"Generalized Coalescing". - - -Operator Spelling ------------------ - -Despite significant support for the proposed operators, the majority of -discussion on python-ideas fixated on the spelling. Many alternative spellings -were proposed, both punctuation and keywords, but each alternative drew some -criticism. Spelling the operator as a keyword is problematic, because adding new -keywords to the language is not backwards compatible. - -It is not impossible to add a new keyword, however, and we can look at several -other PEPs for inspiration. For example, `PEP-492 -`_ introduced the new keywords -``async`` and ``await`` into Python 3.5. These new keywords are fully backwards -compatible, because that PEP also introduces a new lexical context such that -``async`` and ``await`` are only treated as keywords when used inside of an -``async def`` function. In other locations, ``async`` and ``await`` may be used -as identifiers. - -It is also possible to craft a new operator out of existing keywords, as was -the case with `PEP-308 `_, which -created a ternary operator by cobbling together the `if` and `else` keywords -into a new operator. - -In addition to the lexical acrobatics required to create a new keyword, keyword -operators are also undesirable for creating an assignment shortcut syntax. In -Dart, for example, ``x ??= y`` is an assignment shortcut that approximately -means ``x = x ?? y`` except that ``x`` is only evaluated once. If Python's -coalesce operator is a keyword, e.g. ``foo``, then the assignment shortcut would -be very ugly: ``x foo= y``. - -Spelling new logical operators with punctuation is unlikely, for several -reasons. First, Python eschews punctuation for logical operators. For example, -it uses ``not`` instead of ``!``, ``or`` instead of ``||``, and ``… if … else …`` -instead of ``… ? … : …``. - -Second, nearly every single punctuation character on a standard keyboard already -has special meaning in Python. The only exceptions are ``$``, ``!``, ``?``, and -backtick (as of Python 3). This leaves few options for a new, single-character -operator. - -Third, other projects in the Python universe assign special meaning to -punctuation. For example, `IPython -`_ assigns -special meaning to ``%``, ``%%``, ``?``, ``??``, ``$``, and ``$$``, among -others. Out of deference to those projects and the large communities using them, -introducing conflicting syntax into Python is undesirable. - -The spellings ``??`` and ``?.`` will be familiar to programmers who have seen -them in other popular programming languages. Any alternative punctuation will be -just as ugly but without the benefit of familiarity from other languages. -Therefore, this proposal spells the new operators using the same punctuation -that already exists in other languages. - - -``None``-Coalescing Operator ----------------------------- - -The ``None``-coalescing operator is a short-circuiting, binary operator that -behaves in the following way. - -1. Evaluate the left operand first. -2. If the left operand is not ``None``, then return it immediately. -3. Else, evaluate the right operand and return the result. - -Consider the following examples. We will continue to use the spelling ``??`` -here, but keep in mind that alternative spellings will be discussed below:: - - >>> 1 ?? 2 - 1 - >>> None ?? 2 - 2 - -Importantly, note that the right operand is not evaluated unless the left -operand is None:: - - >>> def err(): raise Exception('foo') - >>> 1 ?? err() - 1 - >>> None ?? err() - Traceback (most recent call last): - File "", line 1, in - File "", line 1, in err - Exception: foo - -The operator is left associative. Combined with its short circuiting behavior, -this makes the operator easy to chain:: - - >>> timeout = None - >>> local_timeout = 60 - >>> global_timeout = 300 - >>> timeout ?? local_timeout ?? global_timeout - 60 - - >>> local_timeout = None - >>> timeout ?? local_timeout ?? global_timeout - 300 - -The operator has higher precedence than the comparison operators ``==``, ``>``, -``is``, etc., but lower precedence than any bitwise or arithmetic operators. -This precedence is chosen for making "default value" expressions intuitive to -read and write:: - - >>> not None ?? True - >>> not (None ?? True) # Same precedence - - >>> 1 == None ?? 1 - >>> 1 == (None ?? 1) # Same precedence - - >>> 'foo' in None ?? ['foo', 'bar'] - >>> 'foo' in (None ?? ['foo', 'bar']) # Same precedence - - >>> 1 + None ?? 2 - >>> 1 + (None ?? 2) # Same precedence - -Recall the example above of calculating the cost of items in a shopping cart, -and the easy-to-miss bug. This type of bug is not possible with the ``None``- -coalescing operator, because there is no implicit type coersion to ``bool``:: - - >>> requested_quantity = 0 - >>> default_quantity = 1 - >>> price = 100 - >>> requested_quantity ?? default_quantity * price - 0 - -The ``None``-coalescing operator also has a corresponding assignment shortcut. -The following assignments are semantically similar, except that ``foo`` is only -looked up once when using the assignment shortcut:: - - >>> foo ??= [] - >>> foo = foo ?? [] - -The ``None`` coalescing operator improves readability, especially when handling -default function arguments. Consider again the example from the Requests -library, rewritten to use ``None``-coalescing:: - - def __init__(self, data=None, files=None, headers=None, params=None, hooks=None): - self.data = data ?? [] - self.files = files ?? [] - self.headers = headers ?? {} - self.params = params ?? {} - self.hooks = hooks ?? {} - -The operator makes the intent easier to follow (by putting operands in an -intuitive order) and is more concise than the ternary operator, while still -preserving the short circuit semantics of the code that it replaces. - - -``None``-Aware Attribute Access Operator ----------------------------------------- - -The ``None``-aware attribute access operator (also called "safe navigation") -checks its left operand. If the left operand is ``None``, then the operator -evaluates to ``None``. If the the left operand is not ``None``, then the -operator accesses the attribute named by the right operand:: - - >>> from datetime import date - >>> d = date.today() - >>> d.year - 2015 - - >>> d = None - >>> d.year - Traceback (most recent call last): - File "", line 1, in - AttributeError: 'NoneType' object has no attribute 'year' - - >>> d?.year - None - -The operator has the same precedence and associativity as the plain attribute -access operator ``.``, but this operator is also short-circuiting in a unique -way: if the left operand is ``None``, then any series of attribute access, index -access, slicing, or function call operators immediately to the right of it *are -not evaluated*:: - - >>> name = ' The Black Knight ' - >>> name.strip()[4:].upper() - 'BLACK KNIGHT' - - >>> name = None - >>> name?.strip()[4:].upper() - None - -If this operator did not short circuit in this way, then the second example -would partially evaluate ``name?.strip()`` to ``None()`` and then fail with -``TypeError: 'NoneType' object is not callable``. - -To put it another way, the following expressions are semantically similar, -except that ``name`` is only looked up once on the first line:: - - >>> name?.strip()[4:].upper() - >>> name.strip()[4:].upper() if name is not None else None - -.. note:: - - C# implements its safe navigation operators with the same short-circuiting - semantics, but Dart does not. In Dart, the second example (suitably - translated) would fail. The C# semantics are obviously superior, given the - original goal of writing common cases more concisely. The Dart semantics are - nearly useless. - -This operator short circuits one or more attribute access, index access, -slicing, or function call operators that are adjacent to its right, but it -does not short circuit any other operators (logical, bitwise, arithmetic, etc.), -nor does it escape parentheses:: - - >>> d = date.today() - >>> d?.year.numerator + 1 - 2016 - - >>> d = None - >>> d?.year.numerator + 1 - Traceback (most recent call last): - File "", line 1, in - TypeError: unsupported operand type(s) for +: 'NoneType' and 'int' - - >>> (d?.year).numerator + 1 - Traceback (most recent call last): - File "", line 1, in - AttributeError: 'NoneType' object has no attribute 'numerator' - -Note that the error in the second example is not on the attribute access -``numerator``. In fact, that attribute access is never performed. The error -occurs when adding ``None + 1``, because the ``None``-aware attribute access -does not short circuit ``+``. - -The third example fails because the operator does not escape parentheses. In -that example, the attribute access ``numerator`` is evaluated and fails because -``None`` does not have that attribute. - -Finally, observe that short circuiting adjacent operators is not at all the same -thing as propagating ``None`` throughout an expression:: - - >>> user?.first_name.upper() - -If ``user`` is not ``None``, then ``user.first_name`` is evaluated. If -``user.first_name`` evaluates to ``None``, then ``user.first_name.upper()`` is -an error! In English, this expression says, "``user`` is optional but if it has -a value, then it must have a ``first_name``, too." - -If ``first_name`` is supposed to be optional attribute, then the expression must -make that explicit:: - - >>> user?.first_name?.upper() - -The operator is not intended as an error silencing mechanism, and it would be -undesirable if its presence infected nearby operators. - - -``None``-Aware Index Access/Slicing Operator --------------------------------------------- - -The ``None``-aware index access/slicing operator (also called "safe navigation") -is nearly identical to the ``None``-aware attribute access operator. It combines -the familiar square bracket syntax ``[]`` with new punctuation or a new keyword, -the spelling of which is discussed later:: - - >>> person = {'name': 'Mark', 'age': 32} - >>> person['name'] - 'Mark' - - >>> person = None - >>> person['name'] - Traceback (most recent call last): - File "", line 1, in - TypeError: 'NoneType' object is not subscriptable - - >>> person?.['name'] - None - -The ``None``-aware slicing operator behaves similarly:: - - >>> name = 'The Black Knight' - >>> name[4:] - 'Black Knight' - - >>> name = None - >>> name[4:] - Traceback (most recent call last): - File "", line 1, in - TypeError: 'NoneType' object is not subscriptable - - >>> name?.[4:] - None - -These operators have the same precedence as the plain index access and slicing -operators. They also have the same short-circuiting behavior as the -``None``-aware attribute access. - - -Generalized Coalescing ----------------------- - -Making ``None`` a special case is too specialized and magical. The behavior can -be generalized by making the ``None``-aware operators invoke a dunder method, -e.g. ``__coalesce__(self)`` that returns ``True`` if an object should be -coalesced and ``False`` otherwise. - -With this generalization, ``object`` would implement a dunder method equivalent -to this:: - - def __coalesce__(self): - return False - -``NoneType`` would implement a dunder method equivalent to this:: - - def __coalesce__(self): - return True - -If this generalization is accepted, then the operators will need to be renamed -such that the term ``None`` is not used, e.g. "Coalescing Operator", "Coalesced -Member Access Operator", etc. - -The coalesce operator would invoke this dunder method. The following two -expressions are semantically similar, except `foo` is only looked up once when -using the coalesce operator:: - - >>> foo ?? bar - >>> bar if foo.__coalesce__() else foo - -The coalesced attribute and index access operators would invoke the same dunder -method:: - - >>> user?.first_name.upper() - >>> None if user.__coalesce__() else user.first_name.upper() - -This generalization allows for domain-specific ``null`` objects to be coalesced -just like ``None``. For example the ``pyasn1`` package has a type called -``Null`` that represents an ASN.1 ``null``:: - - >>> from pyasn1.type import univ - >>> univ.Null() ?? univ.Integer(123) - Integer(123) - -In addition to making the proposed operators less specialized, this -generalization also makes it easier to work with the Null Object Pattern, [3]_ -for those developers who prefer to avoid using ``None``. - - -Implementation --------------- - -The author of this PEP is not competent with grammars or lexers, and given the -contentiousness of this proposal, the implementation details for CPython will be -deferred until we have a clearer idea that one or more of the proposed -enhancements will be approved. - -...TBD... - - -References -========== - -.. [1] C# Reference: Operators - (https://msdn.microsoft.com/en-us/library/6a71f45d.aspx) - -.. [2] A Tour of the Dart Language: Operators - (https://www.dartlang.org/docs/dart-up-and-running/ch02.html#operators) - -.. [3] Wikipedia: Null Object Pattern - (https://en.wikipedia.org/wiki/Null_Object_pattern) - -.. [4] PEP-249: - (https://www.python.org/dev/peps/pep-0249/) - -.. [5] PEP-308 - (https://www.python.org/dev/peps/pep-0308/) - - -Copyright -========= - -This document has been placed in the public domain. - - - -.. - Local Variables: - mode: indented-text - indent-tabs-mode: nil - sentence-end-double-space: t - fill-column: 70 - coding: utf-8 - End: diff --git a/pep-0506.txt b/pep-0506.txt index a04df819d..ab4f8d5f2 100644 --- a/pep-0506.txt +++ b/pep-0506.txt @@ -3,7 +3,7 @@ Title: Adding A Secrets Module To The Standard Library Version: $Revision$ Last-Modified: $Date$ Author: Steven D'Aprano -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 19-Sep-2015 diff --git a/pep-0514.txt b/pep-0514.txt index 2ecba9759..014411f8a 100644 --- a/pep-0514.txt +++ b/pep-0514.txt @@ -359,7 +359,7 @@ of ``PythonCore`` is omitted but shown in a later example:: (winreg.HKEY_LOCAL_MACHINE, r'Software\Python', winreg.KEY_WOW64_32KEY), ]: with winreg.OpenKeyEx(hive, key, access=winreg.KEY_READ | flags) as root_key: - for comany in enum_keys(root_key): + for company in enum_keys(root_key): if company == 'PyLauncher': continue @@ -478,4 +478,4 @@ References Copyright ========= -This document has been placed in the public domain. \ No newline at end of file +This document has been placed in the public domain. diff --git a/pep-0517.txt b/pep-0517.txt index 7192cc105..9241d8814 100644 --- a/pep-0517.txt +++ b/pep-0517.txt @@ -6,7 +6,7 @@ Author: Nathaniel J. Smith , Thomas Kluyver BDFL-Delegate: Nick Coghlan Discussions-To: -Status: Accepted +Status: Provisional Type: Standards Track Content-Type: text/x-rst Created: 30-Sep-2015 diff --git a/pep-0518.txt b/pep-0518.txt index 7e405054e..49f3e7ceb 100644 --- a/pep-0518.txt +++ b/pep-0518.txt @@ -7,8 +7,8 @@ Author: Brett Cannon , Donald Stufft BDFL-Delegate: Nick Coghlan Discussions-To: distutils-sig -Status: Accepted -Type: Informational +Status: Provisional +Type: Standards Track Content-Type: text/x-rst Created: 10-May-2016 Post-History: 10-May-2016, @@ -129,24 +129,63 @@ of requirements for the build system to simply begin execution. Specification ============= +File Format +----------- + The build system dependencies will be stored in a file named -``pyproject.toml`` that is written in the TOML format [#toml]_. This -format was chosen as it is human-usable (unlike JSON [#json]_), it is -flexible enough (unlike configparser [#configparser]_), stems from a -standard (also unlike configparser [#configparser]_), and it is not -overly complex (unlike YAML [#yaml]_). The TOML format is already in -use by the Rust community as part of their +``pyproject.toml`` that is written in the TOML format [#toml]_. + +This format was chosen as it is human-usable (unlike JSON [#json]_), +it is flexible enough (unlike configparser [#configparser]_), stems +from a standard (also unlike configparser [#configparser]_), and it +is not overly complex (unlike YAML [#yaml]_). The TOML format is +already in use by the Rust community as part of their Cargo package manager [#cargo]_ and in private email stated they have been quite happy with their choice of TOML. A more thorough discussion as to why various alternatives were not chosen can be read in the `Other file formats`_ section. -There will be a ``[build-system]`` table in the -configuration file to store build-related data. Initially only one key -of the table will be valid and mandatory: ``requires``. That key will -have a value of a list of strings representing the PEP 508 -dependencies required to execute the build system (currently that -means what dependencies are required to execute a ``setup.py`` file). +Tables not specified in this PEP are reserved for future use by other +PEPs. + +build-system table +------------------ + +The ``[build-system]`` table is used to store build-related data. +Initially only one key of the table will be valid and mandatory: +``requires``. This key must have a value of a list of strings +representing PEP 508 dependencies required to execute the build +system (currently that means what dependencies are required to +execute a ``setup.py`` file). + +For the vast majority of Python projects that rely upon setuptools, +the ``pyproject.toml`` file will be:: + + [build-system] + # Minimum requirements for the build system to execute. + requires = ["setuptools", "wheel"] # PEP 508 specifications. + +Because the use of setuptools and wheel are so expansive in the +community at the moment, build tools are expected to use the example +configuration file above as their default semantics when a +``pyproject.toml`` file is not present. + +tool table +---------- + +The ``[tool]`` table is where tools can have users specify +configuration data as long as they use a sub-table within ``[tool]``, +e.g. the `flit `_ tool would store +its configuration in ``[tool.flit]``. + +We need some mechanism to allocate names within the ``tool.*`` +namespace, to make sure that different projects don't attempt to use +the same sub-table and collide. Our rule is that a project can use +the subtable ``tool.$NAME`` if, and only if, they own the entry for +``$NAME`` in the Cheeseshop/PyPI. + +JSON Schema +----------- To provide a type-specific representation of the resulting data from the TOML file for illustrative purposes only, the following JSON @@ -180,31 +219,6 @@ Schema [#jsonschema]_ would match the data format:: } } -For the vast majority of Python projects that rely upon setuptools, -the ``pyproject.toml`` file will be:: - - [build-system] - # Minimum requirements for the build system to execute. - requires = ["setuptools", "wheel"] # PEP 508 specifications. - -Because the use of setuptools and wheel are so expansive in the -community at the moment, build tools are expected to use the example -configuration file above as their default semantics when a -``pyproject.toml`` file is not present. - -All other top-level keys and tables are reserved for future use by -other PEPs except for the ``[tool]`` table. Within that table, tools -can have users specify configuration data as long as they use a -sub-table within ``[tool]``, e.g. the -`flit `_ tool would store its -configuration in ``[tool.flit]``. - -We need some mechanism to allocate names within the ``tool.*`` -namespace, to make sure that different projects don't attempt to use -the same sub-table and collide. Our rule is that a project can use -the subtable ``tool.$NAME`` if, and only if, they own the entry for -``$NAME`` in the Cheeseshop/PyPI. - Rejected Ideas ============== @@ -255,6 +269,44 @@ vendored easily by projects. This outright excluded certain formats like XML which are not friendly towards human beings and were never seriously discussed. +Overview of file formats considered +''''''''''''''''''''''''''''''''''' + +The key reasons for rejecting the other alternatives considered are +summarised in the following sections, while the full review (including +positive arguments in favour of TOML) can be found at [#file_formats]_. + +TOML was ultimately selected as it provided all the features we +were interested in, while avoiding the downsides introduced by +the alternatives. + +======================= ==== ==== ==== ======= +Feature TOML YAML JSON CFG/INI +======================= ==== ==== ==== ======= +Well-defined yes yes yes +Real data types yes yes yes +Reliable Unicode yes yes yes +Reliable comments yes yes +Easy for humans to edit yes ?? ?? +Easy for tools to edit yes ?? yes ?? +In standard library yes yes +Easy for pip to vendor yes n/a n/a +======================= ==== ==== ==== ======= + +("??" in the table indicates items where most folks would be +inclined to answer "yes", but there turn out to be a lot of +quirks and edge cases that arise in practice due to either +the lack of a clear specification, or else the underlying +file format specification being surprisingly complicated) + +The ``pytoml`` TOML parser is ~300 lines of pure Python code, +so being outside the standard library didn't count heavily +against it. + +Python literals were also discussed as a potential format, but +weren't considered in the file format review (since they're not +a common pre-existing file format). + JSON '''' @@ -375,6 +427,17 @@ An example Python literal file for the proposed data would be:: } +Sticking with ``setup.cfg`` +--------------------------- + +There are two issues with ``setup.cfg`` used by setuptools as a general +format. One is that they are ``.ini`` files which have issues as mentioned +in the configparser_ discussion above. The other is that the schema for +that file has never been rigorously defined and thus it's unknown which +format would be safe to use going forward without potentially confusing +setuptools installations. + + Other file names ---------------- @@ -473,6 +536,9 @@ References .. [#jsonschema] JSON Schema (http://json-schema.org/) + +.. [#file_formats] Nathaniel J. Smith's file format review + (https://gist.github.com/njsmith/78f68204c5d969f8c8bc645ef77d4a8f) Copyright diff --git a/pep-0526.txt b/pep-0526.txt index 2d207af8c..a10e10ea2 100644 --- a/pep-0526.txt +++ b/pep-0526.txt @@ -365,7 +365,8 @@ and local variables should have a single space after corresponding colon. There should be no space before the colon. If an assignment has right hand side, then the equality sign should have exactly one space on both sides. Examples: -* Yes:: + +- Yes:: code: int @@ -373,7 +374,7 @@ Examples: coords: Tuple[int, int] label: str = '' -* No:: +- No:: code:int # No space after colon code : int # Space before colon diff --git a/pep-0537.txt b/pep-0537.txt index 229c64664..b18caf380 100644 --- a/pep-0537.txt +++ b/pep-0537.txt @@ -34,7 +34,7 @@ Release Manager and Crew 3.7 Lifespan ============ -3.7 will receive bugfix updates approximately every 3-6 months for +3.7 will receive bugfix updates approximately every 1-3 months for approximately 18 months. After the release of 3.8.0 final, a final 3.7 bugfix update will be released. After that, it is expected that security updates (source only) will be released until 5 years after @@ -56,15 +56,21 @@ Actual: - 3.7.0 alpha 4: 2018-01-09 - 3.7.0 beta 1: 2018-01-31 (No new features beyond this point.) +- 3.7.0 beta 2: 2018-02-27 +- 3.7.0 beta 3: 2018-03-29 +- 3.7.0 beta 4: 2018-05-02 +- 3.7.0 beta 5: 2018-05-30 +- 3.7.0 candidate 1: 2018-06-12 +- 3.7.0 final: 2018-06-27 Expected: -- 3.7.0 beta 2: 2018-02-26 -- 3.7.0 beta 3: 2018-03-26 -- 3.7.0 beta 4: 2018-04-30 -- 3.7.0 candidate 1: 2018-05-21 -- 3.7.0 candidate 2: 2018-06-04 (if necessary) -- 3.7.0 final: 2018-06-15 +Maintenance releases +-------------------- + +Expected: + +- 3.7.1: 2018-07-xx Features for 3.7 diff --git a/pep-0538.txt b/pep-0538.txt index ebe967db7..db1ec7d71 100644 --- a/pep-0538.txt +++ b/pep-0538.txt @@ -97,6 +97,9 @@ with a runtime ``PYTHONCOERCECLOCALE=warn`` environment variable setting that allows developers and system integrators to opt-in to receiving locale coercion and compatibility warnings, without emitting them by default. +The output examples in the PEP itself have also been updated to remove +the warnings and make them easier to read. + Background ========== @@ -352,10 +355,12 @@ proposed solution: PEP process or Python release announcements. However, to minimize the chance of introducing new problems for end users, we'll do this *without* using the warnings system, so even running with ``-Werror`` won't turn it into a runtime - exception. + exception. (Note: these warnings ended up being silenced by default. See the + Implementation Note above for more details) * for Python 3.7, any changed defaults will offer some form of explicit "off" switch at build time, runtime, or both + Minimizing the negative impact on systems currently correctly configured to use GB-18030 or another partially ASCII compatible universal encoding leads to the following design principle: @@ -459,6 +464,9 @@ successfully configured:: Python detected LC_CTYPE=C: LC_CTYPE coerced to C.UTF-8 (set another locale or PYTHONCOERCECLOCALE=0 to disable this locale coercion behaviour). +(Note: this warning ended up being silenced by default. See the +Implementation Note above for more details) + As long as the current platform provides at least one of the candidate UTF-8 based environments, this locale coercion will mean that the standard Python binary *and* locale-aware extensions should once again "just work" @@ -508,6 +516,9 @@ configured locale is still the default ``C`` locale and C.utf8, or UTF-8 (if available) as alternative Unicode-compatible locales is recommended. +(Note: this warning ended up being silenced by default. See the +Implementation Note above for more details) + In this case, no actual change will be made to the locale settings. Instead, the warning informs both system and application integrators that @@ -535,6 +546,10 @@ The locale warning behaviour would be controlled by the flag ``--with[out]-c-locale-warning``, which would set the ``PY_WARN_ON_C_LOCALE`` preprocessor definition. +(Note: this compile time warning option ended up being replaced by a runtime +``PYTHONCOERCECLOCALE=warn`` option. See the Implementation Note above for +more details) + On platforms which don't use the ``autotools`` based build system (i.e. Windows) these preprocessor variables would always be undefined. @@ -925,8 +940,6 @@ cover, as it avoids causing any problems in cases like the following:: $ LANG=C LC_MONETARY=ja_JP.utf8 ./python -c \ "from locale import setlocale, LC_ALL, currency; setlocale(LC_ALL, ''); print(currency(1e6))" - Python detected LC_CTYPE=C: LC_CTYPE & LANG coerced to C.UTF-8 (set another - locale or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior). ¥1000000 @@ -966,9 +979,6 @@ from a PEP 538 enabled CPython build, where each line after the first is executed by doing "up-arrow, left-arrow x4, delete, enter":: $ LANG=C ./python - Python detected LC_CTYPE=C: LC_CTYPE & LANG coerced to C.UTF-8 (set - another locale or PYTHONCOERCECLOCALE=0 to disable this locale - coercion behavior). Python 3.7.0a0 (heads/pep538-coerce-c-locale:188e780, May 7 2017, 00:21:13) [GCC 6.3.1 20161221 (Red Hat 6.3.1-1)] on linux Type "help", "copyright", "credits" or "license" for more information. @@ -1064,7 +1074,7 @@ Accordingly, this PEP originally proposed to disable locale coercion and warnings at build time for these platforms, on the assumption that it would be entirely redundant. -However, that assumpion turned out to be incorrect assumption, as subsequent +However, that assumption turned out to be incorrect, as subsequent investigations showed that if you explicitly configure ``LANG=C`` on these platforms, extension modules like GNU readline will misbehave in much the same way as they do on other \*nix systems. [21_] diff --git a/pep-0539.txt b/pep-0539.txt index 9f92da4c0..c6a10a5f3 100644 --- a/pep-0539.txt +++ b/pep-0539.txt @@ -4,10 +4,11 @@ Version: $Revision$ Last-Modified: $Date$ Author: Erik M. Bray, Masayuki Yamamoto BDFL-Delegate: Nick Coghlan -Status: Accepted +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 20-Dec-2016 +Python-Version: 3.7 Post-History: 16-Dec-2016, 31-Aug-2017, 08-Sep-2017 Resolution: https://mail.python.org/pipermail/python-dev/2017-September/149358.html diff --git a/pep-0540.txt b/pep-0540.txt index bbe0ade25..8d488f619 100644 --- a/pep-0540.txt +++ b/pep-0540.txt @@ -18,7 +18,7 @@ Abstract Add a new "UTF-8 Mode" to enhance Python's use of UTF-8. When UTF-8 Mode is active, Python will: -* use the ``utf-8`` encoding, irregardless of the locale currently set by +* use the ``utf-8`` encoding, regardless of the locale currently set by the current platform, and * change the ``stdin`` and ``stdout`` error handlers to ``surrogateescape``. @@ -163,7 +163,7 @@ The UTF-8 Mode has the same effect as locale coercion: ``surrogateescape``. These changes only affect Python code. But the locale coercion has -addiditonal effects: the ``LC_CTYPE`` environment variable and the +additional effects: the ``LC_CTYPE`` environment variable and the ``LC_CTYPE`` locale are set to a UTF-8 locale like ``C.UTF-8``. One side effect is that non-Python code is also impacted by the locale coercion. The two PEPs are complementary. diff --git a/pep-0541.txt b/pep-0541.txt index d45b6265d..d83cef91d 100644 --- a/pep-0541.txt +++ b/pep-0541.txt @@ -3,12 +3,14 @@ Title: Package Index Name Retention Version: $Revision$ Last-Modified: $Date$ Author: Łukasz Langa -BDFL-Delegate: Donald Stufft +BDFL-Delegate: Mark Mangoba Discussions-To: distutils-sig -Status: Draft +Status: Final Type: Process Content-Type: text/x-rst Created: 12-January-2017 +Post-History: +Resolution: https://mail.python.org/pipermail/distutils-sig/2018-March/032089.html Abstract @@ -36,6 +38,22 @@ This document aims to provide general guidelines for solving the most typical cases of such conflicts. +Approval Process +================ + +As the application of this policy has potential legal ramifications for the +Python Software Foundation, the approval process used is more formal than that +used for most PEPs. + +Rather than accepting the PEP directly, the assigned BDFL-Delegate will instead +recommend its acceptance to the PSF's Packaging Working Group. After +consultation with the PSF's General Counsel, adoption of the policy will then +be subject to a formal vote within the working group. + +This formal approval process will be used for both initial adoption of the +policy, and for adoption of any future amendments. + + Specification ============= @@ -61,7 +79,9 @@ The use cases covered by this document are: * resolving disputes over a name. -* Invalid projects. +* Invalid projects: + + * projects subject to a claim of intellectual property infringement. The proposed extension to the Terms of Use, as expressed in the Implementation section, will be published as a separate document on the @@ -112,7 +132,7 @@ are met: * the project has been determined *abandoned* by the rules described above; -* the candidate is able to demonstrate own failed attempts to contact +* the candidate is able to demonstrate their own failed attempts to contact the existing owner; * the candidate is able to demonstrate improvements made on the candidate's own fork of the project; @@ -137,7 +157,7 @@ of reusing the name when ALL of the following are met: * the project has been determined *abandoned* by the rules described above; -* the candidate is able to demonstrate own failed attempts to contact +* the candidate is able to demonstrate their own failed attempts to contact the existing owner; * the candidate is able to demonstrate that the project suggested to reuse the name already exists and meets notability requirements; @@ -196,24 +216,53 @@ is considered invalid and will be removed from the Index: The Package Index maintainers pre-emptively declare certain package names as unavailable for security reasons. -If you find a project that you think might be considered invalid, create -a support request [7]_. Maintainers of the Package Index will review -the case. +Intellectual property policy +---------------------------- + +It is the policy of Python Software Foundation and the Package Index +maintainers to be appropriately responsive to claims of intellectual +property infringement by third parties. It is not the policy of +the Python Software Foundation nor the Package Index maintainers +to pre-screen uploaded packages for any type of intellectual property +infringement. + +Possibly-infringing packages should be reported to legal@python.org +and counsel to the Python Software Foundation will determine an +appropriate response. A package can be removed or transferred to a +new owner at the sole discretion of the Python Software Foundation to +address a claim of infringement. + +A project published on the Package Index meeting ANY of the following +may be considered infringing and subject to removal from the Index +or transferral to a new owner: + +* project contains unlicensed copyrighted material from a third party, + and is subject to a properly made claim under the DMCA; +* project uses a third party's trademark in a way not covered by + nominal or fair use guidelines; +* project clearly implicates a patented system or process, and is + the subject of a complaint; or +* project is subject to an active lawsuit. + +In the event of a complaint for intellectual property infringement, +a copy of the complaint will be sent to the package owner. In some +cases, action may be taken by the Package Index maintainers before +the owner responds. The role of the Python Software Foundation ------------------------------------------ -The Python Software Foundation [8]_ is the non-profit legal entity that +The Python Software Foundation [7]_ is the non-profit legal entity that provides the Package Index as a community service. The Package Index maintainers can escalate issues covered by this -document for resolution by the PSF Board if the matter is not clear +document for resolution by the Packaging Workgroup if the matter is not clear enough. Some decisions *require* additional judgement by the Board, especially in cases of Code of Conduct violations or legal claims. -Decisions made by the Board are published as Resolutions [9]_. +Recommendations made by the Board are sent to the Packaging Workgroup [8]_ for review. -The Board has the final say in any disputes covered by this document and +The Packaging Workgroup has the final say in any disputes covered by this document and can decide to reassign or remove a project from the Package Index after careful consideration even when not all requirements listed here are met. @@ -266,7 +315,7 @@ References (https://pypi.org/policy/terms-of-use/) .. [2] The Python Package Index - (https://pypi.python.org/) + (https://pypi.org/) .. [3] The Comprehensive Perl Archive Network (http://www.cpan.org/) @@ -280,14 +329,11 @@ References .. [6] Python Community Code of Conduct (https://www.python.org/psf/codeofconduct/) -.. [7] PyPI Support Requests - (https://sourceforge.net/p/pypi/support-requests/) - -.. [8] Python Software Foundation +.. [7] Python Software Foundation (https://www.python.org/psf/) -.. [9] PSF Board Resolutions - (https://www.python.org/psf/records/board/resolutions/) +.. [8] Python Packaging Working Group + (https://wiki.python.org/psf/PackagingWG/) Copyright diff --git a/pep-0544.txt b/pep-0544.txt index 8c5daa01c..079a55354 100644 --- a/pep-0544.txt +++ b/pep-0544.txt @@ -199,8 +199,8 @@ approaches related to structural subtyping in Python and other languages: Such behavior seems to be a perfect fit for both runtime and static behavior of protocols. As discussed in `rationale`_, we propose to add static support for such behavior. In addition, to allow users to achieve such runtime - behavior for *user-defined* protocols a special ``@runtime`` decorator will - be provided, see detailed `discussion`_ below. + behavior for *user-defined* protocols a special ``@runtime_checkable`` decorator + will be provided, see detailed `discussion`_ below. * TypeScript [typescript]_ provides support for user-defined classes and interfaces. Explicit implementation declaration is not required and @@ -381,8 +381,7 @@ Explicitly declaring implementation To explicitly declare that a certain class implements a given protocol, it can be used as a regular base class. In this case a class could use -default implementations of protocol members. ``typing.Sequence`` is a good -example of a protocol with useful default methods. Static analysis tools are +default implementations of protocol members. Static analysis tools are expected to automatically detect that a class implements a given protocol. So while it's possible to subclass a protocol explicitly, it's *not necessary* to do so for the sake of type-checking. @@ -587,6 +586,30 @@ Continuing the previous example:: walk(tree) # OK, 'Tree[float]' is a subtype of 'Traversable' +Self-types in protocols +----------------------- + +The self-types in protocols follow the corresponding specification +[self-types]_ of PEP 484. For example:: + + C = TypeVar('C', bound='Copyable') + class Copyable(Protocol): + def copy(self: C) -> C: + + class One: + def copy(self) -> 'One': + ... + + T = TypeVar('T', bound='Other') + class Other: + def copy(self: T) -> T: + ... + + c: Copyable + c = One() # OK + c = Other() # Also OK + + Using Protocols =============== @@ -665,14 +688,14 @@ classes. For example:: One can use multiple inheritance to define an intersection of protocols. Example:: - from typing import Sequence, Hashable + from typing import Iterable, Hashable - class HashableFloats(Sequence[float], Hashable, Protocol): + class HashableFloats(Iterable[float], Hashable, Protocol): pass def cached_func(args: HashableFloats) -> float: ... - cached_func((1, 2, 3)) # OK, tuple is both hashable and sequence + cached_func((1, 2, 3)) # OK, tuple is both hashable and iterable If this will prove to be a widely used scenario, then a special intersection type construct could be added in future as specified by PEP 483, @@ -740,8 +763,8 @@ aliases:: .. _discussion: -``@runtime`` decorator and narrowing types by ``isinstance()`` --------------------------------------------------------------- +``@runtime_checkable`` decorator and narrowing types by ``isinstance()`` +------------------------------------------------------------------------ The default semantics is that ``isinstance()`` and ``issubclass()`` fail for protocol types. This is in the spirit of duck typing -- protocols @@ -752,38 +775,58 @@ However, it should be possible for protocol types to implement custom instance and class checks when this makes sense, similar to how ``Iterable`` and other ABCs in ``collections.abc`` and ``typing`` already do it, but this is limited to non-generic and unsubscripted generic protocols -(``Iterable`` is statically equivalent to ``Iterable[Any]`). -The ``typing`` module will define a special ``@runtime`` class decorator +(``Iterable`` is statically equivalent to ``Iterable[Any]``). +The ``typing`` module will define a special ``@runtime_checkable`` class decorator that provides the same semantics for class and instance checks as for ``collections.abc`` classes, essentially making them "runtime protocols":: from typing import runtime, Protocol - @runtime - class Closable(Protocol): + @runtime_checkable + class SupportsClose(Protocol): def close(self): ... - assert isinstance(open('some/file'), Closable) - -Static type checkers will understand ``isinstance(x, Proto)`` and -``issubclass(C, Proto)`` for protocols defined with this decorator (as they -already do for ``Iterable`` etc.). Static type checkers will narrow types -after such checks by the type erased ``Proto`` (i.e. with all variables -having type ``Any`` and all methods having type ``Callable[..., Any]``). -Note that ``isinstance(x, Proto[int])`` etc. will always fail in agreement -with PEP 484. Examples:: - - from typing import Iterable, Iterator, Sequence - - def process(items: Iterable[int]) -> None: - if isinstance(items, Iterator): - # 'items' has type 'Iterator[int]' here - elif isinstance(items, Sequence[int]): - # Error! Can't use 'isinstance()' with subscripted protocols + assert isinstance(open('some/file'), SupportsClose) Note that instance checks are not 100% reliable statically, this is why this behavior is opt-in, see section on `rejected`_ ideas for examples. +The most type checkers can do is to treat ``isinstance(obj, Iterator)`` +roughly as a simpler way to write +``hasattr(x, '__iter__') and hasattr(x, '__next__')``. To minimize +the risks for this feature, the following rules are applied. + +**Definitions**: + +* *Data, and non-data protocols*: A protocol is called non-data protocol + if it only contains methods as members (for example ``Sized``, + ``Iterator``, etc). A protocol that contains at least one non-method member + (like ``x: int``) is called a data protocol. +* *Unsafe overlap*: A type ``X`` is called unsafely overlapping with + a protocol ``P``, if ``X`` is not a subtype of ``P``, but it is a subtype + of the type erased version of ``P`` where all members have type ``Any``. + In addition, if at least one element of a union unsafely overlaps with + a protocol ``P``, then the whole union is unsafely overlapping with ``P``. + +**Specification**: + +* A protocol can be used as a second argument in ``isinstance()`` and + ``issubclass()`` only if it is explicitly opt-in by ``@runtime_checkable`` + decorator. This requirement exists because protocol checks are not type safe + in case of dynamically set attributes, and because type checkers can only prove + that an ``isinstance()`` check is safe only for a given class, not for all its + subclasses. +* ``isinstance()`` can be used with both data and non-data protocols, while + ``issubclass()`` can be used only with non-data protocols. This restriction + exists because some data attributes can be set on an instance in constructor + and this information is not always available on the class object. +* Type checkers should reject an ``isinstance()`` or ``issubclass()`` call, if + there is an unsafe overlap between the type of the first argument and + the protocol. +* Type checkers should be able to select a correct element from a union after + a safe ``isinstance()`` or ``issubclass()`` call. For narrowing from non-union + types, type checkers can use their best judgement (this is intentionally + unspecified, since a precise specification would require intersection types). Using Protocols in Python 2.7 - 3.5 @@ -825,14 +868,12 @@ effects on the core interpreter and standard library except in the a protocol or not. Add a class attribute ``_is_protocol = True`` if that is the case. Verify that a protocol class only has protocol base classes in the MRO (except for object). -* Implement ``@runtime`` that allows ``__subclasshook__()`` performing - structural instance and subclass checks as in ``collections.abc`` classes. +* Implement ``@runtime_checkable`` that allows ``__subclasshook__()`` + performing structural instance and subclass checks as in ``collections.abc`` + classes. * All structural subtyping checks will be performed by static type checkers, such as ``mypy`` [mypy]_. No additional support for protocol validation will be provided at runtime. -* Classes ``Mapping``, ``MutableMapping``, ``Sequence``, and - ``MutableSequence`` in ``collections.abc`` module will support structural - instance and subclass checks (like e.g. ``collections.abc.Iterable``). Changes in the typing module @@ -849,8 +890,6 @@ The following classes in ``typing`` module will be protocols: * ``Container`` * ``Collection`` * ``Reversible`` -* ``Sequence``, ``MutableSequence`` -* ``Mapping``, ``MutableMapping`` * ``ContextManager``, ``AsyncContextManager`` * ``SupportsAbs`` (and other ``Supports*`` classes) @@ -1026,11 +1065,10 @@ be considered "non-protocol". Therefore, it was decided to not introduce "non-protocol" methods. There is only one downside to this: it will require some boilerplate for -implicit subtypes of ``Mapping`` and few other "large" protocols. But, this -applies to few "built-in" protocols (like ``Mapping`` and ``Sequence``) and -people are already subclassing them. Also, such style is discouraged for -user-defined protocols. It is recommended to create compact protocols and -combine them. +implicit subtypes of "large" protocols. But, this doesn't apply to "built-in" +protocols that are all "small" (i.e. have only few abstract methods). +Also, such style is discouraged for user-defined protocols. It is recommended +to create compact protocols and combine them. Make protocols interoperable with other approaches @@ -1103,7 +1141,7 @@ Another potentially problematic case is assignment of attributes self.x = 0 c = C() - isinstance(c1, P) # False + isinstance(c, P) # False c.initialize() isinstance(c, P) # True @@ -1149,7 +1187,7 @@ This was rejected for the following reasons: ABCs from ``typing`` module. If we prohibit explicit subclassing of these ABCs, then quite a lot of code will break. -* Convenience: There are existing protocol-like ABCs (that will be turned +* Convenience: There are existing protocol-like ABCs (that may be turned into protocols) that have many useful "mix-in" (non-abstract) methods. For example in the case of ``Sequence`` one only needs to implement ``__getitem__`` and ``__len__`` in an explicit subclass, and one gets @@ -1301,33 +1339,16 @@ confusions. Backwards Compatibility ======================= -This PEP is almost fully backwards compatible. Few collection classes such as -``Sequence`` and ``Mapping`` will be turned into runtime protocols, therefore -results of ``isinstance()`` checks are going to change in some edge cases. -For example, a class that implements the ``Sequence`` protocol but does not -explicitly inherit from ``Sequence`` currently returns ``False`` in -corresponding instance and class checks. With this PEP implemented, such -checks will return ``True``. +This PEP is fully backwards compatible. Implementation ============== -A working implementation of this PEP for ``mypy`` type checker is found on -GitHub repo at https://github.com/ilevkivskyi/mypy/tree/protocols, -corresponding ``typeshed`` stubs for more flavor are found at -https://github.com/ilevkivskyi/typeshed/tree/protocols. Installation steps:: - - git clone --recurse-submodules https://github.com/ilevkivskyi/mypy/ - cd mypy && git checkout protocols && cd typeshed - git remote add proto https://github.com/ilevkivskyi/typeshed - git fetch proto && git checkout proto/protocols - cd .. && git add typeshed && sudo python3 -m pip install -U . - -The runtime implementation of protocols in ``typing`` module is -found at https://github.com/ilevkivskyi/typehinting/tree/protocols. -The version of ``collections.abc`` with structural behavior for mappings and -sequences is found at https://github.com/ilevkivskyi/cpython/tree/protocols. +The ``mypy`` type checker fully supports protocols (modulo a few +known bugs). This includes treating all the builtin protocols, such as +``Iterable`` structurally. The runtime implementation of protocols is +available in ``typing_extensions`` module on PyPI. References @@ -1372,6 +1393,9 @@ References .. [elsewhere] https://github.com/python/peps/pull/224 +.. [self-types] + https://www.python.org/dev/peps/pep-0484/#annotating-instance-and-class-methods + Copyright ========= diff --git a/pep-0545.txt b/pep-0545.txt index 37f1f3ebd..40628c269 100644 --- a/pep-0545.txt +++ b/pep-0545.txt @@ -609,7 +609,7 @@ References (https://github.com/AFPy/python_doc_fr/graphs/contributors?from=2016-01-01&to=2016-12-31&type=c) .. [15] Python-doc on Transifex - (https://www.transifex.com/python-doc/) + (https://www.transifex.com/python-doc/public/) .. [16] French translation (https://www.afpy.org/doc/python/) diff --git a/pep-0546.txt b/pep-0546.txt index ce3ac6de8..4edf309f8 100644 --- a/pep-0546.txt +++ b/pep-0546.txt @@ -5,7 +5,7 @@ Last-Modified: $Date$ Author: Victor Stinner , Cory Benfield , BDFL-Delegate: Benjamin Peterson -Status: Accepted +Status: Rejected Type: Standards Track Content-Type: text/x-rst Created: 30-May-2017 @@ -21,6 +21,15 @@ Backport the ssl.MemoryBIO and ssl.SSLObject classes from Python 3 to Python 2.7 to enhance the overall security of Python 2.7. +Rejection Notice +================ + +This PEP is rejected, see `Withdraw PEP 546? Backport ssl.MemoryBIO and +ssl.SSLObject to Python 2.7 +`_ +discussion for the rationale. + + Rationale ========= diff --git a/pep-0547.rst b/pep-0547.rst index f53ea7a7e..3fc4a422a 100644 --- a/pep-0547.rst +++ b/pep-0547.rst @@ -4,7 +4,7 @@ Version: $Revision$ Last-Modified: $Date$ Author: Marcel Plch , Petr Viktorin -Status: Draft +Status: Deferred Type: Standards Track Content-Type: text/x-rst Created: 25-May-2017 @@ -12,6 +12,17 @@ Python-Version: 3.7 Post-History: +Deferral Notice +=============== + +Cython -- the most important use case for this PEP and the only explicit +one -- is not ready for multi-phase initialization yet. +It keeps global state in C-level static variables. +See discussion at `Cython issue 1923`_. + +The PEP is deferred until the situation changes. + + Abstract ======== @@ -186,6 +197,7 @@ References .. _GitHub: https://github.com/python/cpython/pull/1761 .. _Cython issue 1715: https://github.com/cython/cython/issues/1715 .. _Possible Future Extensions section: https://www.python.org/dev/peps/pep-0489/#possible-future-extensions +.. _Cython issue 1923: https://github.com/cython/cython/pull/1923 Copyright diff --git a/pep-0551.rst b/pep-0551.rst index 48eb80d4c..a44e4aeeb 100644 --- a/pep-0551.rst +++ b/pep-0551.rst @@ -4,28 +4,37 @@ Version: $Revision$ Last-Modified: $Date$ Author: Steve Dower Status: Draft -Type: Standards Track +Type: Informational Content-Type: text/x-rst Created: 23-Aug-2017 Python-Version: 3.7 Post-History: 24-Aug-2017 (security-sig), 28-Aug-2017 (python-dev) +Relationship to PEP 578 +======================= + +This PEP has been split into two since its original posting. + +See `PEP 578 `_ for the +auditing APIs proposed for addition to the next version of Python. + +This is now an informational PEP, providing guidance to those planning +to integrate Python into their secure or audited environments. + Abstract ======== -This PEP describes additions to the Python API and specific behaviors -for the CPython implementation that make actions taken by the Python -runtime visible to security and auditing tools. The goals in order of -increasing importance are to prevent malicious use of Python, to detect -and report on malicious use, and most importantly to detect attempts to -bypass detection. Most of the responsibility for implementation is -required from users, who must customize and build Python for their own -environment. +This PEP describes the concept of security transparency and how it +applies to the Python runtime. Visibility into actions taken by the +runtime is invaluable in integrating Python into an otherwise secure +and/or monitored environment. -We propose two small sets of public APIs to enable users to reliably -build their copy of Python without having to modify the core runtime, -protecting future maintainability. We also discuss recommendations for -users to help them develop and configure their copy of Python. +The audit hooks described in PEP-578 are an essential component in +detecting, identifying and analyzing misuse of Python. While the hooks +themselves are neutral (in that not every reported event is inherently +misuse), they provide essential context to those who are responsible +for monitoring an overall system or network. With enough transparency, +attackers are no longer able to hide. Background ========== @@ -126,14 +135,14 @@ tools, most network access and DNS resolution, and attempts to create and hide files or configuration settings on the local machine. To summarize, defenders have a need to audit specific uses of Python in -order to detect abnormal or malicious usage. Currently, the Python -runtime does not provide any ability to do this, which (anecdotally) has -led to organizations switching to other languages. The aim of this PEP -is to enable system administrators to deploy a security transparent copy -of Python that can integrate with their existing auditing and protection -systems. +order to detect abnormal or malicious usage. With PEP 578, the Python +runtime gains the ability to provide this. The aim of this PEP is to +assist system administrators with deploying a security transparent +version of Python that can integrate with their existing auditing and +protection systems. -On Windows, some specific features that may be enabled by this include: +On Windows, some specific features that may be integrated through the +hooks added by PEP 578 include: * Script Block Logging [3]_ * DeviceGuard [4]_ @@ -151,7 +160,7 @@ On Linux, some specific features that may be integrated are: * SELinux labels [13]_ * check execute bit on imported modules -On macOS, some features that may be used with the expanded APIs are: +On macOS, some features that may be integrated are: * OpenBSM [10]_ * syslog [11]_ @@ -161,9 +170,6 @@ production machines is highly appealing to system administrators and will make Python a more trustworthy dependency for application developers. -Overview of Changes -=================== - True security transparency is not fully achievable by Python in isolation. The runtime can audit as many events as it likes, but unless the logs are reviewed and analyzed there is no value. Python may impose @@ -173,340 +179,64 @@ implementations of certain security features, and organizations with the resources to fully customize their runtime should be encouraged to do so. -The aim of these changes is to enable system administrators to integrate -Python into their existing security systems, without dictating what -those systems look like or how they should behave. We propose two API -changes to enable this: an Audit Hook and Verified Open Hook. Both are -not set by default, and both require modifications to the entry point -binary to enable any functionality. For the purposes of validation and -example, we propose a new ``spython``/``spython.exe`` entry point -program that enables some basic functionality using these hooks. -**However, security-conscious organizations are expected to create their -own entry points to meet their own needs.** +Summary Recommendations +======================= -Audit Hook ----------- +These are discussed in greater detail in later sections, but are +presented here to frame the overall discussion. -In order to achieve security transparency, an API is required to raise -messages from within certain operations. These operations are typically -deep within the Python runtime or standard library, such as dynamic code -compilation, module imports, DNS resolution, or use of certain modules -such as ``ctypes``. +Sysadmins should provide and use an alternate entry point (besides +``python.exe`` or ``pythonX.Y``) in order to reduce surface area and +securely enable audit hooks. A discussion of what could be restricted +is below in `Restricting the Entry Point`_. -The new C APIs required for audit hooks are:: +Sysadmins should use all available measures provided by their operating +system to prevent modifications to their Python installation, such as +file permissions, access control lists and signature validation. - # Add an auditing hook - typedef int (*hook_func)(const char *event, PyObject *args, - void *userData); - int PySys_AddAuditHook(hook_func hook, void *userData); +Sysadmins should log everything and collect logs to a central location +as quickly as possible - avoid keeping logs on outer-ring machines. - # Raise an event with all auditing hooks - int PySys_Audit(const char *event, PyObject *args); - - # Internal API used during Py_Finalize() - not publicly accessible - void _Py_ClearAuditHooks(void); - -The new Python APIs for audit hooks are:: - - # Add an auditing hook - sys.addaudithook(hook: Callable[str, tuple]) -> None - - # Raise an event with all auditing hooks - sys.audit(str, *args) -> None +Sysadmins should prioritize _detection_ of misuse over _prevention_ of +misuse. -Hooks are added by calling ``PySys_AddAuditHook()`` from C at any time, -including before ``Py_Initialize()``, or by calling -``sys.addaudithook()`` from Python code. Hooks are never removed or -replaced, and existing hooks have an opportunity to refuse to allow new -hooks to be added (adding an audit hook is audited, and so preexisting -hooks can raise an exception to block the new addition). +Restricting the Entry Point +=========================== -When events of interest are occurring, code can either call -``PySys_Audit()`` from C (while the GIL is held) or ``sys.audit()``. The -string argument is the name of the event, and the tuple contains -arguments. A given event name should have a fixed schema for arguments, -and both arguments are considered a public API (for a given x.y version -of Python), and thus should only change between feature releases with -updated documentation. +One of the primary vulnerabilities exposed by the presence of Python +on a machine is the ability to execute arbitrary code without +detection or verification by the system. This is made significantly +easier because the default entry point (``python.exe`` on Windows and +``pythonX.Y`` on other platforms) allows execution from the command +line, from standard input, and does not have any hooks enabled by +default. -When an event is audited, each hook is called in the order it was added -with the event name and tuple. If any hook returns with an exception -set, later hooks are ignored and *in general* the Python runtime should -terminate. This is intentional to allow hook implementations to decide -how to respond to any particular event. The typical responses will be to -log the event, abort the operation with an exception, or to immediately -terminate the process with an operating system exit call. +Our recommendation is that production machines should use a modified +entry point instead of the default. Once outside of the development +environment, there is rarely a need for the flexibility offered by the +default entry point. -When an event is audited but no hooks have been set, the ``audit()`` -function should include minimal overhead. Ideally, each argument is a -reference to existing data rather than a value calculated just for the -auditing call. +In this section, we describe a hypothetical ``spython`` entry point +(``spython.exe`` on Windows; ``spythonX.Y`` on other platforms) that +provides a level of security transparency recommended for production +machines. An associated example implementation shows many of the +features described here, though with a number of concessions for the +sake of avoiding platform-specific code. A sufficient implementation +will inherently require some integration with platform-specific +security features. -As hooks may be Python objects, they need to be freed during -``Py_Finalize()``. To do this, we add an internal API -``_Py_ClearAuditHooks()`` that releases any ``PyObject*`` hooks that are -held, as well as any heap memory used. This is an internal function with -no public export, but it triggers an event for all audit hooks to ensure -that unexpected calls are logged. +Official distributions will not include any ``spython`` by default, but +third party distributions may include appropriately modified entry +points that use the same name. -See `Audit Hook Locations`_ for proposed audit hook points and schemas, -and the `Recommendations`_ section for discussion on -appropriate responses. - -Verified Open Hook ------------------- - -Most operating systems have a mechanism to distinguish between files -that can be executed and those that can not. For example, this may be an -execute bit in the permissions field, or a verified hash of the file -contents to detect potential code tampering. These are an important -security mechanism for preventing execution of data or code that is not -approved for a given environment. Currently, Python has no way to -integrate with these when launching scripts or importing modules. - -The new public C API for the verified open hook is:: - - # Set the handler - typedef PyObject *(*hook_func)(PyObject *path) - int PyImport_SetOpenForImportHook(void *handler) - - # Open a file using the handler - PyObject *PyImport_OpenForImport(const char *path) - -The new public Python API for the verified open hook is:: - - # Open a file using the handler - _imp.open_for_import(path) - -The ``_imp.open_for_import()`` function is a drop-in replacement for -``open(str(pathlike), 'rb')``. Its default behaviour is to open a file -for raw, binary access - any more restrictive behaviour requires the -use of a custom handler. Only ``str`` arguments are accepted. - -A custom handler may be set by calling ``PyImport_SetOpenForImportHook()`` -from C at any time, including before ``Py_Initialize()``. However, if a -hook has already been set then the call will fail. When -``open_for_import()`` is called with a hook set, the hook will be passed -the path and its return value will be returned directly. The returned -object should be an open file-like object that supports reading raw -bytes. This is explicitly intended to allow a ``BytesIO`` instance if -the open handler has already had to read the file into memory in order -to perform whatever verification is necessary to determine whether the -content is permitted to be executed. - -Note that these hooks can import and call the ``_io.open()`` function on -CPython without triggering themselves. - -If the hook determines that the file is not suitable for execution, it -should raise an exception of its choice, as well as raising any other -auditing events or notifications. - -All import and execution functionality involving code from a file will -be changed to use ``open_for_import()`` unconditionally. It is important -to note that calls to ``compile()``, ``exec()`` and ``eval()`` do not go -through this function - an audit hook that includes the code from these -calls will be added and is the best opportunity to validate code that is -read from the file. Given the current decoupling between import and -execution in Python, most imported code will go through both -``open_for_import()`` and the log hook for ``compile``, and so care -should be taken to avoid repeating verification steps. - -.. note:: - The use of ``open_for_import()`` by ``importlib`` is a valuable - first defence, but should not be relied upon to prevent misuse. In - particular, it is easy to monkeypatch ``importlib`` in order to - bypass the call. Auditing hooks are the primary way to achieve - security transparency, and are essential for detecting attempts to - bypass other functionality. - -API Availability ----------------- - -While all the functions added here are considered public and stable API, -the behavior of the functions is implementation specific. The -descriptions here refer to the CPython implementation, and while other -implementations should provide the functions, there is no requirement -that they behave the same. - -For example, ``sys.addaudithook()`` and ``sys.audit()`` should exist but -may do nothing. This allows code to make calls to ``sys.audit()`` -without having to test for existence, but it should not assume that its -call will have any effect. (Including existence tests in -security-critical code allows another vector to bypass auditing, so it -is preferable that the function always exist.) - -``_imp.open_for_import(path)`` should at a minimum always return -``_io.open(path, 'rb')``. Code using the function should make no further -assumptions about what may occur, and implementations other than CPython -are not required to let developers override the behavior of this -function with a hook. - -Audit Hook Locations -==================== - -Calls to ``sys.audit()`` or ``PySys_Audit()`` will be added to the -following operations with the schema in Table 1. Unless otherwise -specified, the ability for audit hooks to abort any listed operation -should be considered part of the rationale for including the hook. - -.. csv-table:: Table 1: Audit Hooks - :header: "API Function", "Event Name", "Arguments", "Rationale" - :widths: 2, 2, 3, 6 - - ``PySys_AddAuditHook``, ``sys.addaudithook``, "", "Detect when new - audit hooks are being added. - " - ``_PySys_ClearAuditHooks``, ``sys._clearaudithooks``, "", "Notifies - hooks they are being cleaned up, mainly in case the event is - triggered unexpectedly. This event cannot be aborted. - " - ``PyImport_SetOpenForImportHook``, ``setopenforimporthook``, "", " - Detects any attempt to set the ``open_for_import`` hook. - " - "``compile``, ``exec``, ``eval``, ``PyAst_CompileString``, - ``PyAST_obj2mod``", ``compile``, "``(code, filename_or_none)``", " - Detect dynamic code compilation, where ``code`` could be a string or - AST. Note that this will be called for regular imports of source - code, including those that were opened with ``open_for_import``. - " - "``exec``, ``eval``, ``run_mod``", ``exec``, "``(code_object,)``", " - Detect dynamic execution of code objects. This only occurs for - explicit calls, and is not raised for normal function invocation. - " - ``import``, ``import``, "``(module, filename, sys.path, - sys.meta_path, sys.path_hooks)``", "Detect when modules are - imported. This is raised before the module name is resolved to a - file. All arguments other than the module name may be ``None`` if - they are not used or available. - " - ``code_new``, ``code.__new__``, "``(bytecode, filename, name)``", " - Detect dynamic creation of code objects. This only occurs for - direct instantiation, and is not raised for normal compilation. - " - ``func_new_impl``, ``function.__new__``, "``(code,)``", "Detect - dynamic creation of function objects. This only occurs for direct - instantiation, and is not raised for normal compilation. - " - "``_ctypes.dlopen``, ``_ctypes.LoadLibrary``", ``ctypes.dlopen``, " - ``(module_or_path,)``", "Detect when native modules are used. - " - ``_ctypes._FuncPtr``, ``ctypes.dlsym``, "``(lib_object, name)``", " - Collect information about specific symbols retrieved from native - modules. - " - ``_ctypes._CData``, ``ctypes.cdata``, "``(ptr_as_int,)``", "Detect - when code is accessing arbitrary memory using ``ctypes``. - " - ``id``, ``id``, "``(id_as_int,)``", "Detect when code is accessing - the id of objects, which in CPython reveals information about - memory layout. - " - ``sys._getframe``, ``sys._getframe``, "``(frame_object,)``", "Detect - when code is accessing frames directly. - " - ``sys._current_frames``, ``sys._current_frames``, "", "Detect when - code is accessing frames directly. - " - ``PyEval_SetProfile``, ``sys.setprofile``, "", "Detect when code is - injecting trace functions. Because of the implementation, exceptions - raised from the hook will abort the operation, but will not be - raised in Python code. Note that ``threading.setprofile`` eventually - calls this function, so the event will be audited for each thread. - " - ``PyEval_SetTrace``, ``sys.settrace``, "", "Detect when code is - injecting trace functions. Because of the implementation, exceptions - raised from the hook will abort the operation, but will not be - raised in Python code. Note that ``threading.settrace`` eventually - calls this function, so the event will be audited for each thread. - " - ``_PyEval_SetAsyncGenFirstiter``, ``sys.set_async_gen_firstiter``, " - ", "Detect changes to async generator hooks. - " - ``_PyEval_SetAsyncGenFinalizer``, ``sys.set_async_gen_finalizer``, " - ", "Detect changes to async generator hooks. - " - ``_PyEval_SetCoroutineWrapper``, ``sys.set_coroutine_wrapper``, " - ", "Detect changes to the coroutine wrapper. - " - "``socket.bind``, ``socket.connect``, ``socket.connect_ex``, - ``socket.getaddrinfo``, ``socket.getnameinfo``, ``socket.sendmsg``, - ``socket.sendto``", ``socket.address``, "``(address,)``", "Detect - access to network resources. The address is unmodified from the - original call. - " - ``socket.__init__``, "socket()", "``(family, type, proto)``", " - Detect creation of sockets. The arguments will be int values. - " - ``socket.gethostname``, ``socket.gethostname``, "", "Detect attempts - to retrieve the current host name. - " - ``socket.sethostname``, ``socket.sethostname``, "``(name,)``", " - Detect attempts to change the current host name. The name argument - is passed as a bytes object. - " - "``socket.gethostbyname``, ``socket.gethostbyname_ex``", - "``socket.gethostbyname``", "``(name,)``", "Detect host name - resolution. The name argument is a str or bytes object. - " - ``socket.gethostbyaddr``, ``socket.gethostbyaddr``, " - ``(address,)``", "Detect host resolution. The address argument is a - str or bytes object. - " - ``socket.getservbyname``, ``socket.getservbyname``, "``(name, - protocol)``", "Detect service resolution. The arguments are str - objects. - " - "``socket.getservbyport``", ``socket.getservbyport``, "``(port, - protocol)``", "Detect service resolution. The port argument is an - int and protocol is a str. - " - "``member_get``, ``func_get_code``, ``func_get_[kw]defaults`` - ",``object.__getattr__``,"``(object, attr)``","Detect access to - restricted attributes. This event is raised for any built-in - members that are marked as restricted, and members that may allow - bypassing imports. - " - "``_PyObject_GenericSetAttr``, ``check_set_special_type_attr``, - ``object_set_class``, ``func_set_code``, ``func_set_[kw]defaults``"," - ``object.__setattr__``","``(object, attr, value)``","Detect monkey - patching of types and objects. This event - is raised for the ``__class__`` attribute and any attribute on - ``type`` objects. - " - "``_PyObject_GenericSetAttr``",``object.__delattr__``,"``(object, - attr)``","Detect deletion of object attributes. This event is raised - for any attribute on ``type`` objects. - " - "``Unpickler.find_class``",``pickle.find_class``,"``(module_name, - global_name)``","Detect imports and global name lookup when - unpickling. - " - "``array_new``",``array.__new__``,"``(typecode, initial_value)``", " - Detects creation of array objects. - " - -TODO - more hooks in ``_socket``, ``_ssl``, others? - -SPython Entry Point -=================== - -A new entry point binary will be added, called ``spython.exe`` on -Windows and ``spythonX.Y`` on other platforms. This entry point is -intended primarily as an example, as we expect most users of this -functionality to implement their own entry point and hooks (see -`Recommendations`_). It will also be used for tests. - -Source builds will build ``spython`` by default, but distributions -should not include it except as a test binary. The python.org managed -binary distributions will not include ``spython``. - -**Do not accept most command-line arguments** +**Remove most command-line arguments** The ``spython`` entry point requires a script file be passed as the -first argument, and does not allow any options. This prevents arbitrary -code execution from in-memory data or non-script files (such as pickles, -which can be executed using ``-m pickle ``. +first argument, and does not allow any options to precede it. This +prevents arbitrary code execution from in-memory data or non-script +files (such as pickles, which could be executed using +``-m pickle ``. Options ``-B`` (do not write bytecode), ``-E`` (ignore environment variables) and ``-s`` (no user site) are assumed. @@ -517,38 +247,57 @@ will be used to initialize ``sys.path`` following the rules currently described `for Windows `_. -When built with ``Py_DEBUG``, the ``spython`` entry point will allow a -``-i`` option with no other arguments to enter into interactive mode, -with audit messages being written to standard error rather than a file. -This is intended for testing and debugging only. +For the sake of demonstration, the example implementation of +``spython`` also allows the ``-i`` option to start in interactive mode. +This is not recommended for restricted entry points. -**Log security events to a file** +**Log audited events** -Before initialization, ``spython`` will set an audit hook that writes -events to a local file. By default, this file is the full path of the -process with a ``.log`` suffix, but may be overridden with the -``SPYTHONLOG`` environment variable (despite such overrides being -explicitly discouraged in `Recommendations`_). +Before initialization, ``spython`` sets an audit hook that writes all +audited events to an OS-managed log file. On Windows, this is the Event +Tracing functionality,[7]_ and on other platforms they go to +syslog.[11]_ Logs are copied from the machine as frequently as possible +to prevent loss of information should an attacker attempt to clear +local logs or prevent legitimate access to the machine. The audit hook will also abort all ``sys.addaudithook`` events, preventing any other hooks from being added. +The logging hook is written in native code and configured before the +interpreter is initialized. This is the only opportunity to ensure that +no Python code executes without auditing, and that Python code cannot +prevent registration of the hook. + +Our primary aim is to record all actions taken by all Python processes, +so that detection may be performed offline against logged events. +Having all events recorded also allows for deeper analysis and the use +of machine learning algorithms. These are useful for detecting +persistent attacks, where the attacker is intending to remain within +the protected machines for some period of time, as well as for later +analysis to determine the impact and exposure caused by a successful +attack. + +The example implementation of ``spython`` writes to a log file on the +local machine, for the sake of demonstration. When started with ``-i``, +the example implementation writes all audit events to standard error +instead of the log file. The ``SPYTHONLOG`` environment variable can be +used to specify the log file location. + **Restrict importable modules** -Also before initialization, ``spython`` will set an open-for-import -hook that validates all files opened with ``os.open_for_import``. This -implementation will require all files to have a ``.py`` suffix (thereby -blocking the use of cached bytecode), and will raise a custom audit -event ``spython.open_for_import`` containing ``(filename, -True_if_allowed)``. +Also before initialization, ``spython`` sets an open-for-import hook +that validates all files opened with ``os.open_for_import``. This +implementation requires all files to have a ``.py`` suffix (preventing +the use of cached bytecode), and will raise a custom audit event +``spython.open_for_import`` containing ``(filename, True_if_allowed)``. -On Windows, the hook will also open the file with flags that prevent any -other process from opening it with write access, which allows the hook -to perform additional validation on the contents with confidence that it -will not be modified between the check and use. Compilation will later -trigger a ``compile`` event, so there is no need to read the contents -now for AMSI, but other validation mechanisms such as DeviceGuard [4]_ -should be performed here. +After opening the file, the entire contents is read into memory in a +single buffer and the file is closed. + +Compilation will later trigger a ``compile`` event, so there is no need +to validate the contents now using mechanisms that also apply to +dynamically generated code. However, if a whitelist of source files or +file hashes is available, then this is the point **Restrict globals in pickles** @@ -556,35 +305,37 @@ The ``spython`` entry point will abort all ``pickle.find_class`` events that use the default implementation. Overrides will not raise audit events unless explicitly added, and so they will continue to be allowed. -Performance Impact -================== +**Prevent os.system** -The important performance impact is the case where events are being -raised but there are no hooks attached. This is the unavoidable case - -once a distributor or sysadmin begins adding audit hooks they have -explicitly chosen to trade performance for functionality. Performance -impact using ``spython`` or with hooks added are not of interest here, -since this is considered opt-in functionality. +The ``spython`` entry point aborts all ``os.system`` calls. -Analysis using the ``performance`` tool shows no significant impact, -with the vast majority of benchmarks showing between 1.05x faster to -1.05x slower. +It should be noted here that ``subprocess.Popen(shell=True)`` is +allowed (though logged via the platform-specific process creation +events). This tradeoff is made because it is much simpler to induce a +running application to call ``os.system`` with a single string argument +than a function with multiple arguments, and so it is more likely to be +used as part of an exploit. There is also little justification for +using ``os.system`` in production code, while ``subprocess.Popen`` has +a large number of legitimate uses. Though logs indicating the use of +the ``shell=True`` argument should be more carefully scrutinised. -In our opinion, the performance impact of the set of auditing points -described in this PEP is negligible. +Sysadmins are encouraged to make these kinds of tradeoffs between +restriction and detection, and generally should prefer detection. -Recommendations -=============== +General Recommendations +======================= -Specific recommendations are difficult to make, as the ideal -configuration for any environment will depend on the user's ability to -manage, monitor, and respond to activity on their own network. However, -many of the proposals here do not appear to be of value without deeper -illustration. This section provides recommendations using the terms -**should** (or **should not**), indicating that we consider it dangerous -to ignore the advice, and **may**, indicating that for the advice ought -to be considered for high value systems. The term **sysadmins** refers -to whoever is responsible for deploying Python throughout your network; +Recommendations beyond those suggested in the previous section are +difficult, as the ideal configuration for any environment depends on +the sysadmin's ability to manage, monitor, and respond to activity on +their own network. Nonetheless, here we attempt to provide some context +and guidance for integrating Python into a complete system. + +This section provides recommendations using the terms **should** (or +**should not**), indicating that we consider it risky to ignore the +advice, and **may**, indicating that for the advice ought to be +considered for high value systems. The term **sysadmin** refers to +whoever is responsible for deploying Python throughout the network; different organizations may have an alternative title for the responsible people. @@ -666,73 +417,30 @@ Since ``importlib``'s use of ``open_for_import`` may be easily bypassed with monkeypatching, an audit hook **should** be used to detect attribute changes on type objects. -[TODO: more good advice; less bad advice] +Rejected Advice +=============== -Rejected Ideas -============== +This section discusses common or "obviously good" recommendations that +we are specifically *not* making. These range from useless or incorrect +through to ideas that are simply not feasible in any real world +environment. -Separate module for audit hooks -------------------------------- +**Do not** attempt to implement a sandbox within the Python runtime. +There is a long history of attempts to allow arbitrary code limited use +of Python features (such as [14]_), but no general success. The best +options are to run unrestricted Python within a sandboxed environment +with at least hypervisor-level isolation, or to prevent unauthorised +code from starting at all. -The proposal is to add a new module for audit hooks, hypothetically -``audit``. This would separate the API and implementation from the -``sys`` module, and allow naming the C functions ``PyAudit_AddHook`` and -``PyAudit_Audit`` rather than the current variations. +**Do not** rely on static analysis to verify untrusted code before use. +The best options are to pre-authorise trusted code, such as with code +signing, and if not possible to identify known-bad code, such as with +an anti-malware scanner. -Any such module would need to be a built-in module that is guaranteed to -always be present. The nature of these hooks is that they must be -callable without condition, as any conditional imports or calls provide -more opportunities to intercept and suppress or modify events. +**Do not** use audit hooks to abort operations without logging the +event first. You will regret not knowing why your process disappeared. -Given its nature as one of the most core modules, the ``sys`` module is -somewhat protected against module shadowing attacks. Replacing ``sys`` -with a sufficiently functional module that the application can still run -is a much more complicated task than replacing a module with only one -function of interest. An attacker that has the ability to shadow the -``sys`` module is already capable of running arbitrary code from files, -whereas an ``audit`` module can be replaced with a single statement:: - - import sys; sys.modules['audit'] = type('audit', (object,), - {'audit': lambda *a: None, 'addhook': lambda *a: None}) - -Multiple layers of protection already exist for monkey patching attacks -against either ``sys`` or ``audit``, but assignments or insertions to -``sys.modules`` are not audited. - -This idea is rejected because it makes substituting ``audit`` calls -throughout all callers near trivial. - -Flag in sys.flags to indicate "secure" mode -------------------------------------------- - -The proposal is to add a value in ``sys.flags`` to indicate when Python -is running in a "secure" mode. This would allow applications to detect -when some features are enabled and modify their behaviour appropriately. - -Currently there are no guarantees made about security by this PEP - this -section is the first time the word "secure" has been used. Security -**transparency** does not result in any changed behaviour, so there is -no appropriate reason for applications to modify their behaviour. - -Both application-level APIs ``sys.audit`` and ``_imp.open_for_import`` -are always present and functional, regardless of whether the regular -``python`` entry point or some alternative entry point is used. Callers -cannot determine whether any hooks have been added (except by performing -side-channel analysis), nor do they need to. The calls should be fast -enough that callers do not need to avoid them, and the sysadmin is -responsible for ensuring their added hooks are fast enough to not affect -application performance. - -The argument that this is "security by obscurity" is valid, but -irrelevant. Security by obscurity is only an issue when there are no -other protective mechanisms; obscurity as the first step in avoiding -attack is strongly recommended (see `this article -`_ for -discussion). - -This idea is rejected because there are no appropriate reasons for an -application to change its behaviour based on whether these APIs are in -use. +[TODO - more bad advice] Further Reading =============== @@ -789,7 +497,7 @@ References .. [4] ``_ -.. [5] AMSI, ``_ +.. [5] Antimalware Scan Interface, ``_ .. [6] Persistent Zone Identifiers, ``_ @@ -807,6 +515,8 @@ References .. [13] SELinux access decisions ``_ +.. [14] The failure of pysandbox ``_ + Acknowledgments =============== @@ -820,7 +530,7 @@ discussions. Copyright ========= -Copyright (c) 2017 by Microsoft Corporation. This material may be +Copyright (c) 2017-2018 by Microsoft Corporation. This material may be distributed only subject to the terms and conditions set forth in the Open Publication License, v1.0 or later (the latest version is presently available at http://www.opencontent.org/openpub/). diff --git a/pep-0554.rst b/pep-0554.rst index c90ea1ea6..56eb37de9 100644 --- a/pep-0554.rst +++ b/pep-0554.rst @@ -6,15 +6,16 @@ Type: Standards Track Content-Type: text/x-rst Created: 2017-09-05 Python-Version: 3.8 -Post-History: 07-Sep-2017, 08-Sep-2017, 13-Sep-2017, 05-Dec-2017 +Post-History: 07-Sep-2017, 08-Sep-2017, 13-Sep-2017, 05-Dec-2017, + 09-May-2018 Abstract ======== CPython has supported multiple interpreters in the same process (AKA -"subinterpreters") since version 1.5. The feature has been available -via the C-API. [c-api]_ Subinterpreters operate in +"subinterpreters") since version 1.5 (1997). The feature has been +available via the C-API. [c-api]_ Subinterpreters operate in `relative isolation from one another `_, which provides the basis for an `alternative concurrency model `_. @@ -30,7 +31,7 @@ Proposal The ``interpreters`` module will be added to the stdlib. It will provide a high-level interface to subinterpreters and wrap a new -low-level ``_interpreters`` (in the same was as the ``threading`` +low-level ``_interpreters`` (in the same way as the ``threading`` module). See the `Examples`_ section for concrete usage and use cases. Along with exposing the existing (in CPython) subinterpreter support, @@ -47,6 +48,8 @@ At first only the following types will be supported for sharing: * None * bytes +* str +* int * PEP 3118 buffer objects (via ``send_buffer()``) Support for other basic types (e.g. int, Ellipsis) will be added later. @@ -87,6 +90,14 @@ For creating and using interpreters: | channels=None) | | (This blocks the current thread until done.) | +-----------------------+-----------------------------------------------------+ +| + ++----------------+--------------+------------------------------------------------------+ +| exception | base | description | ++================+==============+======================================================+ +| RunFailedError | RuntimeError | Interpreter.run() resulted in an uncaught exception. | ++----------------+--------------+------------------------------------------------------+ + For sharing data between interpreters: +--------------------------------+--------------------------------------------+ @@ -120,9 +131,11 @@ For sharing data between interpreters: | .recv_nowait(default=None) -> | | Like recv(), but return the default | | object | | instead of waiting. | +-------------------------------+-----------------------------------------------+ -| .close() | | No longer associate the current interpreter | +| .release() | | No longer associate the current interpreter | | | | with the channel (on the receiving end). | +-------------------------------+-----------------------------------------------+ +| .close(force=False) | | Close the channel in all interpreters. | ++-------------------------------+-----------------------------------------------+ | @@ -147,9 +160,31 @@ For sharing data between interpreters: +---------------------------+-------------------------------------------------+ | .send_buffer_nowait(obj) | | Like send_buffer(), but fail if not received. | +---------------------------+-------------------------------------------------+ -| .close() | | No longer associate the current interpreter | +| .release() | | No longer associate the current interpreter | | | | with the channel (on the sending end). | +---------------------------+-------------------------------------------------+ +| .close(force=False) | | Close the channel in all interpreters. | ++---------------------------+-------------------------------------------------+ + +| + ++----------------------+--------------------+------------------------------------------------+ +| exception | base | description | ++======================+====================+================================================+ +| ChannelError | Exception | The base class for channel-related exceptions. | ++----------------------+--------------------+------------------------------------------------+ +| ChannelNotFoundError | ChannelError | The identified channel was not found. | ++----------------------+--------------------+------------------------------------------------+ +| ChannelEmptyError | ChannelError | The channel was unexpectedly empty. | ++----------------------+--------------------+------------------------------------------------+ +| ChannelNotEmptyError | ChannelError | The channel was unexpectedly not empty. | ++----------------------+--------------------+------------------------------------------------+ +| NotReceivedError | ChannelError | Nothing was waiting to receive a sent object. | ++----------------------+--------------------+------------------------------------------------+ +| ChannelClosedError | ChannelError | The channel is closed. | ++----------------------+--------------------+------------------------------------------------+ +| ChannelReleasedError | ChannelClosedError | The channel is released (but not yet closed). | ++----------------------+--------------------+------------------------------------------------+ Examples @@ -218,7 +253,7 @@ Synchronize using a channel interp.run(tw.dedent(""" reader.recv() print("during") - reader.close() + reader.release() """), shared=dict( reader=r, @@ -229,7 +264,7 @@ Synchronize using a channel t.start() print('after') s.send(b'') - s.close() + s.release() Sharing a file descriptor ------------------------- @@ -280,7 +315,7 @@ Passing objects via marshal obj = marshal.loads(data) do_something(obj) data = reader.recv() - reader.close() + reader.release() """)) t = threading.Thread(target=run) t.start() @@ -310,7 +345,7 @@ Passing objects via pickle obj = pickle.loads(data) do_something(obj) data = reader.recv() - reader.close() + reader.release() """)) t = threading.Thread(target=run) t.start() @@ -514,6 +549,8 @@ channels to the following: * None * bytes +* str +* int * PEP 3118 buffer objects (via ``send_buffer()``) Limiting the initial shareable types is a practical matter, reducing @@ -563,7 +600,7 @@ Existing Usage Subinterpreters are not a widely used feature. In fact, the only documented cases of wide-spread usage are -`mod_wsgi `_and +`mod_wsgi `_ and `JEP `_. On the one hand, this case provides confidence that existing subinterpreter support is relatively stable. On the other hand, there isn't much of a sample size from which @@ -686,16 +723,24 @@ The module also provides the following class: "run()" call into one long script. This is the same as how the REPL operates. - Regarding uncaught exceptions, we noted that they are - "effectively" propagated into the code where ``run()`` was called. - To prevent leaking exceptions (and tracebacks) between - interpreters, we create a surrogate of the exception and its - traceback (see ``traceback.TracebackException``), wrap it in a - RuntimeError, and raise that. - Supported code: source text. +Uncaught Exceptions +------------------- + +Regarding uncaught exceptions in ``Interpreter.run()``, we noted that +they are "effectively" propagated into the code where ``run()`` was +called. To prevent leaking exceptions (and tracebacks) between +interpreters, we create a surrogate of the exception and its traceback +(see ``traceback.TracebackException``), set it to ``__cause__`` on a +new ``RunFailedError``, and raise that. + +Raising (a proxy of) the exception is problematic since it's harder to +distinguish between an error in the ``run()`` call and an uncaught +exception from the subinterpreter. + + API for sharing data -------------------- @@ -703,8 +748,8 @@ Subinterpreters are less useful without a mechanism for sharing data between them. Sharing actual Python objects between interpreters, however, has enough potential problems that we are avoiding support for that here. Instead, only mimimum set of types will be supported. -Initially this will include ``bytes`` and channels. Further types may -be supported later. +Initially this will include ``None``, ``bytes``, ``str``, ``int``, +and channels. Further types may be supported later. The ``interpreters`` module provides a way for users to determine whether an object is shareable or not: @@ -737,11 +782,12 @@ many-to-many, channels have no buffer. Create a new channel and return (recv, send), the RecvChannel and SendChannel corresponding to the ends of the channel. The channel is not closed and destroyed (i.e. garbage-collected) until the number - of associated interpreters returns to 0. + of associated interpreters returns to 0 (including when the channel + is explicitly closed). An interpreter gets associated with a channel by calling its "send()" or "recv()" method. That association gets dropped by calling - "close()" on the channel. + "release()" on the channel. Both ends of the channel are supported "shared" objects (i.e. may be safely shared by different interpreters. Thus they may be passed as @@ -765,7 +811,8 @@ many-to-many, channels have no buffer. interpreters: The list of associated interpreters: those that have called - the "recv()" or "__next__()" methods and haven't called "close()". + the "recv()" or "__next__()" methods and haven't called + "release()" (and the channel hasn't been explicitly closed). recv(): @@ -773,10 +820,11 @@ many-to-many, channels have no buffer. the channel. If none have been sent then wait until the next send. This associates the current interpreter with the channel. - If the channel is already closed (see the close() method) - then raise EOFError. If the channel isn't closed, but the current - interpreter already called the "close()" method (which drops its - association with the channel) then raise ValueError. + If the channel is already closed then raise ChannelClosedError. + If the channel isn't closed but the current interpreter already + called the "release()" method (which drops its association with + the channel) then raise ChannelReleasedError (which is a subclass + of ChannelClosedError). recv_nowait(default=None): @@ -784,26 +832,35 @@ many-to-many, channels have no buffer. then return the default. Otherwise, this is the same as the "recv()" method. - close(): + release(): No longer associate the current interpreter with the channel (on the receiving end) and block future association (via the "recv()" - method. If the interpreter was never associated with the channel + method). If the interpreter was never associated with the channel then still block future association. Once an interpreter is no longer associated with the channel, subsequent (or current) send() - and recv() calls from that interpreter will raise ValueError - (or EOFError if the channel is actually marked as closed). + and recv() calls from that interpreter will raise + ChannelReleasedError (or ChannelClosedError if the channel + is actually marked as closed). Once the number of associated interpreters on both ends drops to 0, the channel is actually marked as closed. The Python runtime will garbage collect all closed channels, though it may - not be immediately. Note that "close()" is automatically called + not be immediately. Note that "release()" is automatically called in behalf of the current interpreter when the channel is no longer used (i.e. has no references) in that interpreter. - This operation is idempotent. Return True if "close()" has not + This operation is idempotent. Return True if "release()" has not been called before by the current interpreter. + close(force=False): + + Close both ends of the channel (in all interpreters). This means + that any further use of the channel raises ChannelClosedError. If + the channel is not empty then raise ChannelNotEmptyError (if + "force" is False) or discard the remaining objects (if "force" + is True) and close it. + ``SendChannel(id)``:: @@ -827,16 +884,16 @@ many-to-many, channels have no buffer. object is not shareable then ValueError is raised. Currently only bytes are supported. - If the channel is already closed (see the close() method) - then raise EOFError. If the channel isn't closed, but the current - interpreter already called the "close()" method (which drops its - association with the channel) then raise ValueError. + If the channel is already closed then raise ChannelClosedError. + If the channel isn't closed but the current interpreter already + called the "release()" method (which drops its association with + the channel) then raise ChannelReleasedError. send_nowait(obj): Send the object to the receiving end of the channel. If the other - end is not currently receiving then raise RuntimeError. Otherwise - this is the same as "send()". + end is not currently receiving then raise NotReceivedError. + Otherwise this is the same as "send()". send_buffer(obj): @@ -847,14 +904,23 @@ many-to-many, channels have no buffer. send_buffer_nowait(obj): Send a MemoryView of the object rather than the object. If the - other end is not currently receiving then raise RuntimeError. + other end is not currently receiving then raise NotReceivedError. Otherwise this is the same as "send_buffer()". - close(): + release(): - This is the same as "RecvChannel.close(), but applied to the + This is the same as "RecvChannel.release(), but applied to the sending end of the channel. + close(force=False): + + Close both ends of the channel (in all interpreters). No matter + what the "send" end of the channel is immediately closed. If the + channel is empty then close the "recv" end immediately too. + Otherwise wait until the channel is empty before closing it (if + "force" is False) or discard the remaining items and close + immediately (if "force" is True). + Note that ``send_buffer()`` is similar to how ``multiprocessing.Connection`` works. [mp-conn]_ @@ -862,7 +928,9 @@ Note that ``send_buffer()`` is similar to how Open Questions ============== -None +* "force" argument to ``ch.release()``? +* add a "tp_share" type slot instead of using a global registry + for shareable types? Open Implementation Questions @@ -1020,9 +1088,8 @@ exception, effectively ending execution in the interpreter that tried to use the poisoned channel. This could be accomplished by adding a ``poison()`` method to both ends -of the channel. The ``close()`` method could work if it had a ``force`` -option to force the channel closed. Regardless, these semantics are -relatively specialized and can wait. +of the channel. The ``close()`` method can be used in this way +(mostly), but these semantics are relatively specialized and can wait. Sending channels over channels ------------------------------ @@ -1070,14 +1137,6 @@ generic module reset mechanism may prove unnecessary. This isn't a critical feature initially. It can wait until later if desirable. -Support passing ints in channels --------------------------------- - -Passing ints around should be fine and ultimately is probably -desirable. However, we can get by with serializing them as bytes -for now. The goal is a minimal API for the sake of basic -functionality at first. - File descriptors and sockets in channels ---------------------------------------- @@ -1119,7 +1178,8 @@ Channel context managers Context manager support on ``RecvChannel`` and ``SendChannel`` may be helpful. The implementation would be simple, wrapping a call to -``close()`` like files do. As with iteration, this can wait. +``close()`` (or maybe ``release()``) like files do. As with iteration, +this can wait. Pipes and Queues ---------------- @@ -1136,19 +1196,11 @@ reasonable. The could be trivially implemented as wrappers around channels. Alternatively they could be implemented for efficiency at the same low level as channels. -interpreters.RunFailedError ---------------------------- +Buffering +--------- -As currently proposed, ``Interpreter.run()`` offers you no way to -distinguish an error coming from the subinterpreter from any other -error in the current interpreter. Your only option would be to -explicitly wrap your ``run()`` call in a -``try: ... except RuntimeError:`` (since we wrap a proxy of the original -exception in a RuntimeError and raise that). - -If this is a problem in practice then would could add something like -``interpreters.RunFailedError`` (subclassing RuntimeError) and raise that -in ``run()``. +The proposed channels are unbuffered. This simplifies the API and +implementation. If buffering is desireable we can add it later. Return a lock from send() ------------------------- @@ -1162,6 +1214,26 @@ This matters for buffered channels (i.e. queues). For unbuffered channels it is a non-issue. So this can be dealt with once channels support buffering. +Add a "reraise" method to RunFailedError +---------------------------------------- + +While having ``__cause__`` set on ``RunFailedError`` helps produce a +more useful traceback, it's less helpful when handling the original +error. To help facilitate this, we could add +``RunFailedError.reraise()``. This method would enable the following +pattern:: + + try: + interp.run(script) + except RunFailedError as exc: + try: + exc.reraise() + except MyException: + ... + +This would be made even simpler if there existed a ``__reraise__`` +protocol. + Rejected Ideas ============== @@ -1170,7 +1242,7 @@ Explicit channel association ---------------------------- Interpreters are implicitly associated with channels upon ``recv()`` and -``send()`` calls. They are de-associated with ``close()`` calls. The +``send()`` calls. They are de-associated with ``release()`` calls. The alternative would be explicit methods. It would be either ``add_channel()`` and ``remove_channel()`` methods on ``Interpreter`` objects or something similar on channel objects. @@ -1216,15 +1288,16 @@ While that might not be a problem currently, it would be a problem once interpreters get better isolation relative to memory management (which is necessary to stop sharing the GIL between interpreters). We've resolved the semantics of how the exceptions propagate by raising a -RuntimeError instead, which wraps a safe proxy for the original -exception and traceback. +``RunFailedError`` instead, for which ``__cause__`` wraps a safe proxy +for the original exception and traceback. Rejected possible solutions: -* set the RuntimeError's __cause__ to the proxy of the original - exception * reproduce the exception and traceback in the original interpreter and raise that. +* raise a subclass of RunFailedError that proxies the original + exception and traceback. +* raise RuntimeError instead of RunFailedError * convert at the boundary (a la ``subprocess.CalledProcessError``) (requires a cross-interpreter representation) * support customization via ``Interpreter.excepthook`` @@ -1282,7 +1355,7 @@ References https://bugs.python.org/issue6531 .. [mp-conn] - https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Connection + https://docs.python.org/3/library/multiprocessing.html#connection-objects .. [bug-rate] https://mail.python.org/pipermail/python-ideas/2017-September/047094.html diff --git a/pep-0557.rst b/pep-0557.rst index 49c0b21e7..bc98865c9 100644 --- a/pep-0557.rst +++ b/pep-0557.rst @@ -6,7 +6,7 @@ Type: Standards Track Content-Type: text/x-rst Created: 02-Jun-2017 Python-Version: 3.7 -Post-History: 08-Sep-2017, 25-Nov-2017, 30-Nov-2017, 01-Dec-2017, 02-Dec-2017, 06-Jan-2018 +Post-History: 08-Sep-2017, 25-Nov-2017, 30-Nov-2017, 01-Dec-2017, 02-Dec-2017, 06-Jan-2018, 04-Mar-2018 Resolution: https://mail.python.org/pipermail/python-dev/2017-December/151034.html Notice for Reviewers @@ -93,7 +93,7 @@ There have been numerous attempts to define classes which exist primarily to store values which are accessible by attribute lookup. Some examples include: -- collection.namedtuple in the standard library. +- collections.namedtuple in the standard library. - typing.NamedTuple in the standard library. @@ -170,7 +170,7 @@ The ``dataclass`` decorator is typically used with no parameters and no parentheses. However, it also supports the following logical signature:: - def dataclass(*, init=True, repr=True, eq=True, order=False, hash=None, frozen=False) + def dataclass(*, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) If ``dataclass`` is used just as a simple decorator with no parameters, it acts as if it has the default values documented in this @@ -184,7 +184,7 @@ signature. That is, these three uses of ``@dataclass`` are equivalent:: class C: ... - @dataclass(init=True, repr=True, eq=True, order=False, hash=None, frozen=False) + @dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) class C: ... @@ -200,10 +200,15 @@ The parameters to ``dataclass`` are: are not included. For example: ``InventoryItem(name='widget', unit_price=3.0, quantity_on_hand=10)``. -- ``eq``: If true (the default), ``__eq__`` and ``__ne__`` methods - will be generated. These compare the class as if it were a tuple of - its fields, in order. Both instances in the comparison must be of - the identical type. + If the class already defines ``__repr__``, this parameter is + ignored. + +- ``eq``: If true (the default), an ``__eq__`` method will be + generated. This method compares the class as if it were a tuple of its + fields, in order. Both instances in the comparison must be of the + identical type. + + If the class already defines ``__eq__``, this parameter is ignored. - ``order``: If true (the default is False), ``__lt__``, ``__le__``, ``__gt__``, and ``__ge__`` methods will be generated. These compare @@ -211,9 +216,11 @@ The parameters to ``dataclass`` are: instances in the comparison must be of the identical type. If ``order`` is true and ``eq`` is false, a ``ValueError`` is raised. -- ``hash``: Either a bool or ``None``. If ``None`` (the default), the - ``__hash__`` method is generated according to how ``eq`` and - ``frozen`` are set. + If the class already defines any of ``__lt__``, ``__le__``, + ``__gt__``, or ``__ge__``, then ``ValueError`` is raised. + +- ``unsafe_hash``: If ``False`` (the default), the ``__hash__`` method + is generated according to how ``eq`` and ``frozen`` are set. If ``eq`` and ``frozen`` are both true, Data Classes will generate a ``__hash__`` method for you. If ``eq`` is true and ``frozen`` is @@ -224,15 +231,36 @@ The parameters to ``dataclass`` are: to id-based hashing). Although not recommended, you can force Data Classes to create a - ``__hash__`` method with ``hash=True``. This might be the case if your - class is logically immutable but can nonetheless be mutated. This - is a specialized use case and should be considered carefully. + ``__hash__`` method with ``unsafe_hash=True``. This might be the + case if your class is logically immutable but can nonetheless be + mutated. This is a specialized use case and should be considered + carefully. + + If a class already has an explicitely defined ``__hash__`` the + behavior when adding ``__hash__`` is modified. An expicitely + defined ``__hash__`` is defined when: + + - ``__eq__`` is defined in the class and ``__hash__`` is defined + with any value other than ``None``. + + - ``__eq__`` is defined in the class and any non-``None`` + ``__hash__`` is defined. + + - ``__eq__`` is not defined on the class, and any ``__hash__`` is + defined. + + If ``unsafe_hash`` is true and an explicitely defined ``__hash__`` + is present, then ``ValueError`` is raised. + + If ``unsafe_hash`` is false and an explicitely defined ``__hash__`` + is present, then no ``__hash__`` is added. See the Python documentation [#]_ for more information. - ``frozen``: If true (the default is False), assigning to fields will generate an exception. This emulates read-only frozen instances. - See the discussion below. + If either ``__getattr__`` or ``__setattr__`` is defined in the + class, then ``ValueError`` is raised. See the discussion below. ``field``\s may optionally specify a default value, using normal Python syntax:: @@ -533,7 +561,7 @@ Module level helper functions - ``fields(class_or_instance)``: Returns a tuple of ``Field`` objects that define the fields for this Data Class. Accepts either a Data - Class, or an instance of a Data Class. Raises `ValueError` if not + Class, or an instance of a Data Class. Raises ``ValueError`` if not passed a Data Class or instance of one. Does not return pseudo-fields which are ``ClassVar`` or ``InitVar``. diff --git a/pep-0560.rst b/pep-0560.rst index ba7853397..fe6ee4954 100644 --- a/pep-0560.rst +++ b/pep-0560.rst @@ -206,6 +206,59 @@ the backwards compatibility:: return meta(name, resolved_bases, ns, **kwds) +Using ``__class_getitem__`` in C extensions +------------------------------------------- + +As mentioned above, ``__class_getitem__`` is automatically a class method +if defined in Python code. To define this method in a C extension, one +should use flags ``METH_O|METH_CLASS``. For example, a simple way to make +an extension class generic is to use a method that simply returns the +original class objects, thus fully erasing the type information at runtime, +and deferring all check to static type checkers only:: + + typedef struct { + PyObject_HEAD + /* ... your code ... */ + } SimpleGeneric; + + static PyObject * + simple_class_getitem(PyObject *type, PyObject *item) + { + Py_INCREF(type); + return type; + } + + static PyMethodDef simple_generic_methods[] = { + {"__class_getitem__", simple_class_getitem, METH_O|METH_CLASS, NULL}, + /* ... other methods ... */ + }; + + PyTypeObject SimpleGeneric_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "SimpleGeneric", + sizeof(SimpleGeneric), + 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_methods = simple_generic_methods, + }; + +Such class can be used as a normal generic in Python type annotations +(a corresponding stub file should be provided for static type checkers, +see PEP 484 for details):: + + from simple_extension import SimpleGeneric + from typing import TypeVar + + T = TypeVar('T') + + Alias = SimpleGeneric[str, T] + class SubClass(SimpleGeneric[T, int]): + ... + + data: Alias[int] # Works at runtime + more_data: SubClass[str] # Also works at runtime + + Backwards compatibility and impact on users who don't use ``typing`` ==================================================================== diff --git a/pep-0561.rst b/pep-0561.rst index 289d28bc4..e1823c469 100644 --- a/pep-0561.rst +++ b/pep-0561.rst @@ -1,12 +1,12 @@ PEP: 561 Title: Distributing and Packaging Type Information Author: Ethan Smith -Status: Draft +Status: Accepted Type: Standards Track Content-Type: text/x-rst Created: 09-Sep-2017 Python-Version: 3.7 -Post-History: 10-Sep-2017, 12-Sep-2017, 06-Oct-2017, 26-Oct-2017 +Post-History: 10-Sep-2017, 12-Sep-2017, 06-Oct-2017, 26-Oct-2017, 12-Apr-2018 Abstract @@ -49,10 +49,11 @@ Definition of Terms The definition of "MAY", "MUST", and "SHOULD", and "SHOULD NOT" are to be interpreted as described in RFC 2119. -"inline" - the types are part of the runtime code using PEP 526 and 3107 -syntax. +"inline" - the types are part of the runtime code using PEP 526 and +PEP 3107 syntax (the filename ends in ``.py``). -"stubs" - files containing only type information, empty of runtime code. +"stubs" - files containing only type information, empty of runtime code +(the filename ends in ``.pyi``). "Distributions" are the packaged files which are used to publish and distribute a release. [3]_ @@ -60,13 +61,16 @@ a release. [3]_ "Module" a file containing Python runtime code or stubbed type information. "Package" a directory or directories that namespace Python modules. +(Note the distinction between packages and distributions. While most +distributions are named after the one package they install, some +distributions install multiple packages.) Specification ============= There are several motivations and methods of supporting typing in a package. -This PEP recognizes three (3) types of packages that users of typing wish to +This PEP recognizes three types of packages that users of typing wish to create: 1. The package maintainer would like to add type information inline. @@ -77,7 +81,7 @@ create: a package, but the maintainer does not want to include them in the source of the package. -This PEP aims to support these scenarios and make them simple to add to +This PEP aims to support all three scenarios and make them simple to add to packaging and deployment. The two major parts of this specification are the packaging specifications @@ -115,15 +119,15 @@ Distutils option example:: ..., ) -For namespace packages, the ``py.typed`` file should be in the submodules of -the namespace, to avoid conflicts and for clarity. +For namespace packages (see PEP 420), the ``py.typed`` file should be in the +submodules of the namespace, to avoid conflicts and for clarity. This PEP does not support distributing typing information as part of module-only distributions. The code should be refactored into a package-based distribution and indicate that the package supports typing as described above. -Stub Only Packages +Stub-only Packages '''''''''''''''''' For package maintainers wishing to ship stub files containing all of their @@ -131,21 +135,26 @@ type information, it is preferred that the ``*.pyi`` stubs are alongside the corresponding ``*.py`` files. However, the stubs can also be put in a separate package and distributed separately. Third parties can also find this method useful if they wish to distribute stub files. The name of the stub package -MUST follow the scheme ``foopkg_stubs`` for type stubs for the package named -``foopkg``. The normal resolution order of checking ``*.pyi`` before ``*.py`` -will be maintained. +MUST follow the scheme ``foopkg-stubs`` for type stubs for the package named +``foopkg``. Note that for stub-only packages adding a ``py.typed`` marker is not +needed since the name ``*-stubs`` is enough to indicate it is a source of typing +information. Third parties seeking to distribute stub files are encouraged to contact the maintainer of the package about distribution alongside the package. If the maintainer does not wish to maintain or package stub files or type information -inline, then a third party stub only package can be created. +inline, then a third party stub-only package can be created. In addition, stub-only distributions SHOULD indicate which version(s) of the runtime package are supported by indicating the runtime distribution's version(s) through normal dependency data. For example, the -stub package ``flyingcircus_stubs`` can indicate the versions of the +stub package ``flyingcircus-stubs`` can indicate the versions of the runtime ``flyingcircus`` distribution it supports through ``install_requires`` -in distutils-based tools, or the equivalent in other packaging tools. +in distutils-based tools, or the equivalent in other packaging tools. Note that +in pip 9.0, if you update ``flyingcircus-stubs``, it will update +``flyingcircus``. In pip 9.0, you can use the +``--upgrade-strategy=only-if-needed`` flag. In pip 10.0 this is the default +behavior. Type Checker Module Resolution Order @@ -158,13 +167,14 @@ resolve modules containing type information: 2. Stubs or Python source manually put in the beginning of the path. Type checkers SHOULD provide this to allow the user complete control of which - stubs to use, and patch broken stubs/inline types from packages. + stubs to use, and to patch broken stubs/inline types from packages. + In mypy the ``$MYPYPATH`` environment variable can be used for this. -3. Stub packages - these packages can supersede the installed packages. - They can be found at ``foopkg_stubs`` for package ``foopkg``. +3. Stub packages - these packages SHOULD supersede any installed inline + package. They can be found at ``foopkg-stubs`` for package ``foopkg``. 4. Inline packages - if there is nothing overriding the installed - package, and it opts into type checking, inline types SHOULD be used. + package, *and* it opts into type checking, inline types SHOULD be used. 5. Typeshed (if used) - Provides the stdlib types and several third party libraries. @@ -177,27 +187,77 @@ that the type checker allow for the user to point to a particular Python binary, in case it is not in the path. +Partial Stub Packages +--------------------- + +Many stub packages will only have part of the type interface for libraries +completed, especially initially. For the benefit of type checking and code +editors, packages can be "partial". This means modules not found in the stub +package SHOULD be searched for in parts four and five of the module resolution +order above, namely inline packages and typeshed. + +Type checkers should merge the stub package and runtime package or typeshed +directories. This can be thought of as the functional equivalent of copying the +stub package into the same directory as the corresponding runtime package or +typeshed folder and type checking the combined directory structure. Thus type +checkers MUST maintain the normal resolution order of checking ``*.pyi`` before +``*.py`` files. + +Stub packages can opt into declaring themselves as partial by including +``partial\n`` in the package's ``py.typed`` file. + + Implementation ============== The proposed scheme of indicating support for typing is completely backwards -compatible, and requires no modification to tooling. A sample package with -inline types is available [typed_pkg]_, as well as a sample package checker -[pkg_checker]_ which reads the metadata of installed packages and reports on -their status as either not typed, inline typed, or a stub package. +compatible, and requires no modification to package tooling. A sample package +with inline types is available [typed_package]_, as well as a [stub_package]_. A +sample package checker [pkg_checker]_ which reads the metadata of installed +packages and reports on their status as either not typed, inline typed, or a +stub package. + +The mypy type checker has an implementation of PEP 561 searching which can be +read about in the mypy docs [4]_. + +[numpy-stubs]_ is an example of a real stub-only package for the numpy +distribution. Acknowledgements ================ This PEP would not have been possible without the ideas, feedback, and support -of Ivan Levkivskyi, Jelle Zijlstra, Nick Coghlan, Daniel F Moisset, Nathaniel -Smith, and Guido van Rossum. +of Ivan Levkivskyi, Jelle Zijlstra, Nick Coghlan, Daniel F Moisset, Andrey +Vlasovskikh, Nathaniel Smith, and Guido van Rossum. Version History =============== +* 2018-07-09 + + * Add links to sample stub-only packages + +* 2018-06-19 + + * Partial stub packages can look at typeshed as well as runtime packages + +* 2018-05-15 + + * Add partial stub package spec. + +* 2018-04-09 + + * Add reference to mypy implementation + * Clarify stub package priority. + +* 2018-02-02 + + * Change stub-only package suffix to be -stubs not _stubs. + * Note that py.typed is not needed for stub-only packages. + * Add note about pip and upgrading stub packages. + * 2017-11-12 * Rewritten to use existing tooling only @@ -208,7 +268,7 @@ Version History * Specification re-written to use package metadata instead of distribution metadata. - * Removed stub only packages and merged into third party packages spec. + * Removed stub-only packages and merged into third party packages spec. * Removed suggestion for typecheckers to consider checking runtime versions * Implementations updated to reflect PEP changes. @@ -238,9 +298,18 @@ References .. [3] PEP 426 definitions (https://www.python.org/dev/peps/pep-0426/) -.. [typed_pkg] Sample typed package +.. [4] Example implementation in a type checker + (https://mypy.readthedocs.io/en/latest/installed_packages.html) + +.. [stub_package] A stub-only package + (https://github.com/ethanhs/stub-package) + +.. [typed_package] Sample typed package (https://github.com/ethanhs/sample-typed-package) +.. [numpy-stubs] Stubs for numpy + (https://github.com/numpy/numpy-stubs) + .. [pkg_checker] Sample package checker (https://github.com/ethanhs/check_typedpkg) diff --git a/pep-0562.rst b/pep-0562.rst index 8dd584f6a..c16712955 100644 --- a/pep-0562.rst +++ b/pep-0562.rst @@ -72,7 +72,7 @@ imports. Consider a simple example:: # main.py import lib - lib.submodule.HeavyClass # prints "Submodule loaded" + lib.submod.HeavyClass # prints "Submodule loaded" There is a related proposal PEP 549 that proposes to support instance properties for a similar functionality. The difference is this PEP proposes diff --git a/pep-0566.txt b/pep-0566.rst similarity index 92% rename from pep-0566.txt rename to pep-0566.rst index bc3f24e1a..c76be7aba 100644 --- a/pep-0566.txt +++ b/pep-0566.rst @@ -5,14 +5,14 @@ Last-Modified: $Date$ Author: Dustin Ingram BDFL-Delegate: Daniel Holth Discussions-To: distutils-sig -Status: Draft +Status: Final Type: Standards Track Content-Type: text/x-rst Created: 1-Dec-2017 Python-Version: 3.x Post-History: Replaces: 345 - +Resolution: https://mail.python.org/pipermail/distutils-sig/2018-February/032014.html Abstract ======== @@ -81,6 +81,14 @@ Name The specification for the format of this field is now identical to the distribution name specification defined in PEP 508. +Description +::::::::::: + +In addition to the ``Description`` header field, the distribution's +description may instead be provided in the message body (i.e., after a +completely blank line following the headers, with no indentation or other +special formatting necessary). + Version Specifiers ================== @@ -124,6 +132,8 @@ as follows: single list containing all the original values for the given key; #. The ``Keywords`` field should be converted to a list by splitting the original value on whitespace characters; +#. The message body, if present, should be set to the value of the + ``description`` key. #. The result should be stored as a string-keyed dictionary. Summary of Differences From PEP 345 diff --git a/pep-0567.rst b/pep-0567.rst index cbc6920d3..3f5f3fffb 100644 --- a/pep-0567.rst +++ b/pep-0567.rst @@ -712,6 +712,15 @@ This proposal was deferred to Python 3.8+ because of the following: ctx.run(func) +3. If ``Context`` was mutable it would mean that context variables + could be mutated separately (or concurrently) from the code that + runs within the context. That would be similar to obtaining a + reference to a running Python frame object and modifying its + ``f_locals`` from another OS thread. Having one single way to + assign values to context variables makes contexts conceptually + simpler and more predictable, while keeping the door open for + future performance optimizations. + Having initial values for ContextVars ------------------------------------- diff --git a/pep-0571.rst b/pep-0571.rst new file mode 100644 index 000000000..3fea4df7d --- /dev/null +++ b/pep-0571.rst @@ -0,0 +1,379 @@ +PEP: 571 +Title: The manylinux2010 Platform Tag +Version: $Revision$ +Last-Modified: $Date$ +Author: Mark Williams , + Geoffrey Thomas , + Thomas Kluyver +BDFL-Delegate: Nick Coghlan +Discussions-To: Distutils SIG +Status: Active +Type: Informational +Content-Type: text/x-rst +Created: +Post-History: +Resolution: https://mail.python.org/pipermail/distutils-sig/2018-April/032156.html + + +Abstract +======== + +This PEP proposes the creation of a ``manylinux2010`` platform tag to +succeed the ``manylinux1`` tag introduced by PEP 513 [1]_. It also +proposes that PyPI and ``pip`` both be updated to support uploading, +downloading, and installing ``manylinux2010`` distributions on compatible +platforms. + +Rationale +========= + +True to its name, the ``manylinux1`` platform tag has made the +installation of binary extension modules a reality on many Linux +systems. Libraries like ``cryptography`` [2]_ and ``numpy`` [3]_ are +more accessible to Python developers now that their installation on +common architectures does not depend on fragile development +environments and build toolchains. + +``manylinux1`` wheels achieve their portability by allowing the +extension modules they contain to link against only a small set of +system-level shared libraries that export versioned symbols old enough +to benefit from backwards-compatibility policies. Extension modules +in a ``manylinux1`` wheel that rely on ``glibc``, for example, must be +built against version 2.5 or earlier; they may then be run systems +that provide more recent ``glibc`` version that still export the +required symbols at version 2.5. + +PEP 513 drew its whitelisted shared libraries and their symbol +versions from CentOS 5.11, which was the oldest supported CentOS +release at the time of its writing. Unfortunately, CentOS 5.11 +reached its end-of-life on March 31st, 2017 with a clear warning +against its continued use. [4]_ No further updates, such as security +patches, will be made available. This means that its packages will +remain at obsolete versions that hamper the efforts of Python software +packagers who use the ``manylinux1`` Docker image. + +CentOS 6 is now the oldest supported CentOS release, and will receive +maintenance updates through November 30th, 2020. [5]_ We propose that +a new PEP 425-style [6]_ platform tag called ``manylinux2010`` be derived +from CentOS 6 and that the ``manylinux`` toolchain, PyPI, and ``pip`` +be updated to support it. + +This was originally proposed as ``manylinux2``, but the versioning has +been changed to use calendar years (also known as CalVer [23]_). This +makes it easier to define future *manylinux* tags out of order: for +example, a hypothetical ``manylinux2017`` standard may be defined via +a new PEP before ``manylinux2014``, or a ``manylinux2007`` standard +might be defined that targets systems older than this PEP but newer +than ``manylinux1``. + +Calendar versioning also gives a rough idea of which Linux +distribution versions support which tag: ``manylinux2010`` will work +on most distribution versions released since 2010. This is only an +approximation, however: the actual compatibility rules are defined +below, and some newer distributions may not meet them. + +The ``manylinux2010`` policy +============================ + +The following criteria determine a ``linux`` wheel's eligibility for +the ``manylinux2010`` tag: + +1. The wheel may only contain binary executables and shared objects + compiled for one of the two architectures supported by CentOS 6: + x86_64 or i686. [5]_ +2. The wheel's binary executables or shared objects may not link + against externally-provided libraries except those in the following + whitelist: :: + + libgcc_s.so.1 + libstdc++.so.6 + libm.so.6 + libdl.so.2 + librt.so.1 + libcrypt.so.1 + libc.so.6 + libnsl.so.1 + libutil.so.1 + libpthread.so.0 + libresolv.so.2 + libX11.so.6 + libXext.so.6 + libXrender.so.1 + libICE.so.6 + libSM.so.6 + libGL.so.1 + libgobject-2.0.so.0 + libgthread-2.0.so.0 + libglib-2.0.so.0 + + This list is identical to the externally-provided libraries + whitelisted for ``manylinux1``, minus ``libncursesw.so.5`` and + ``libpanelw.so.5``. [7]_ ``libpythonX.Y`` remains ineligible for + inclusion for the same reasons outlined in PEP 513. + + On Debian-based systems, these libraries are provided by the packages: + + ============ ======================================================= + Package Libraries + ============ ======================================================= + libc6 libdl.so.2, libresolv.so.2, librt.so.1, libc.so.6, + libpthread.so.0, libm.so.6, libutil.so.1, libcrypt.so.1, + libnsl.so.1 + libgcc1 libgcc_s.so.1 + libgl1 libGL.so.1 + libglib2.0-0 libgobject-2.0.so.0, libgthread-2.0.so.0, libglib-2.0.so.0 + libice6 libICE.so.6 + libsm6 libSM.so.6 + libstdc++6 libstdc++.so.6 + libx11-6 libX11.so.6 + libxext6 libXext.so.6 + libxrender1 libXrender.so.1 + ============ ======================================================= + + On RPM-based systems, they are provided by these packages: + + ============ ======================================================= + Package Libraries + ============ ======================================================= + glib2 libglib-2.0.so.0, libgthread-2.0.so.0, libgobject-2.0.so.0 + glibc libresolv.so.2, libutil.so.1, libnsl.so.1, librt.so.1, + libcrypt.so.1, libpthread.so.0, libdl.so.2, libm.so.6, + libc.so.6 + libICE libICE.so.6 + libX11 libX11.so.6 + libXext: libXext.so.6 + libXrender libXrender.so.1 + libgcc: libgcc_s.so.1 + libstdc++ libstdc++.so.6 + mesa libGL.so.1 + ============ ======================================================= + +3. If the wheel contains binary executables or shared objects linked + against any whitelisted libraries that also export versioned + symbols, they may only depend on the following maximum versions:: + + GLIBC_2.12 + CXXABI_1.3.3 + GLIBCXX_3.4.13 + GCC_4.3.0 + + As an example, ``manylinux2010`` wheels may include binary artifacts + that require ``glibc`` symbols at version ``GLIBC_2.4``, because + this an earlier version than the maximum of ``GLIBC_2.12``. +4. If a wheel is built for any version of CPython 2 or CPython + versions 3.0 up to and including 3.2, it *must* include a CPython + ABI tag indicating its Unicode ABI. A ``manylinux2010`` wheel built + against Python 2, then, must include either the ``cpy27mu`` tag + indicating it was built against an interpreter with the UCS-4 ABI + or the ``cpy27m`` tag indicating an interpeter with the UCS-2 + ABI. [8]_ [9]_ +5. A wheel *must not* require the ``PyFPE_jbuf`` symbol. This is + achieved by building it against a Python compiled *without* the + ``--with-fpectl`` ``configure`` flag. + +Compilation of Compliant Wheels +=============================== + +Like ``manylinux1``, the ``auditwheel`` tool adds ``manylinux2010`` +platform tags to ``linux`` wheels built by ``pip wheel`` or +``bdist_wheel`` in a ``manylinux2010`` Docker container. + +Docker Images +------------- + +``manylinux2010`` Docker images based on CentOS 6 x86_64 and i686 are +provided for building binary ``linux`` wheels that can reliably be +converted to ``manylinux2010`` wheels. [10]_ These images come with a +full compiler suite installed (``gcc``, ``g++``, and ``gfortran`` +4.8.2) as well as the latest releases of Python and ``pip``. + +Compatibility with kernels that lack ``vsyscall`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Docker container assumes that its userland is compatible with its +host's kernel. Unfortunately, an increasingly common kernel +configuration breaks breaks this assumption for x86_64 CentOS 6 Docker +images. + +Versions 2.14 and earlier of ``glibc`` require the kernel provide an +archaic system call optimization known as ``vsyscall`` on x86_64. [11]_ +To effect the optimization, the kernel maps a read-only page of +frequently-called system calls -- most notably ``time(2)`` -- into +each process at a fixed memory location. ``glibc`` then invokes these +system calls by dereferencing a function pointer to the appropriate +offset into the ``vsyscall`` page and calling it. This avoids the +overhead associated with invoking the kernel that affects normal +system call invocation. ``vsyscall`` has long been deprecated in +favor of an equivalent mechanism known as vDSO, or "virtual dynamic +shared object", in which the kernel instead maps a relocatable virtual +shared object containing the optimized system calls into each +process. [12]_ + +The ``vsyscall`` page has serious security implications because it +does not participate in address space layout randomization (ASLR). +Its predictable location and contents make it a useful source of +gadgets used in return-oriented programming attacks. [13]_ At the same +time, its elimination breaks the x86_64 ABI, because ``glibc`` +versions that depend on ``vsyscall`` suffer from segmentation faults +when attempting to dereference a system call pointer into a +non-existent page. As a compromise, Linux 3.1 implemented an +"emulated" ``vsyscall`` that reduced the executable code, and thus the +material for ROP gadgets, mapped into the process. [14]_ +``vsyscall=emulated`` has been the default configuration in most +distribution's kernels for many years. + +Unfortunately, ``vsyscall`` emulation still exposes predicatable code +at a reliable memory location, and continues to be useful for +return-oriented programming. [15]_ Because most distributions have now +upgraded to ``glibc`` versions that do not depend on ``vsyscall``, +they are beginning to ship kernels that do not support ``vsyscall`` at +all. [16]_ + +CentOS 5.11 and 6 both include versions of ``glibc`` that depend on +the ``vsyscall`` page (2.5 and 2.12.2 respectively), so containers +based on either cannot run under kernels provided with many +distribution's upcoming releases. [17]_ If Travis CI, for example, +begins running jobs under +a kernel that does not provide the ``vsyscall`` interface, Python +packagers will not be able to use our Docker images there to build +``manylinux`` wheels. [19]_ + +We have derived a patch from the ``glibc`` git repository that +backports the removal of all dependencies on ``vsyscall`` to the +version of ``glibc`` included with our ``manylinux2010`` image. [20]_ +Rebuilding ``glibc``, and thus building ``manylinux2010`` image itself, +still requires a host kernel that provides the ``vsyscall`` mechanism, +but the resulting image can be both run on hosts that provide it and +those that do not. Because the ``vsyscall`` interface is an +optimization that is only applied to running processes, the +``manylinux2010`` wheels built with this modified image should be +identical to those built on an unmodified CentOS 6 system. Also, the +``vsyscall`` problem applies only to x86_64; it is not part of the +i686 ABI. + +Auditwheel +---------- + +The ``auditwheel`` tool has also been updated to produce +``manylinux2010`` wheels. [21]_ Its behavior and purpose are otherwise +unchanged from PEP 513. + + +Platform Detection for Installers +================================= + +Platforms may define a ``manylinux2010_compatible`` boolean attribute on +the ``_manylinux`` module described in PEP 513. A platform is +considered incompatible with ``manylinux2010`` if the attribute is +``False``. + +If the ``_manylinux`` module is not found, or it does not have the attribute +``manylinux2010_compatible``, tools may fall back to checking for glibc. If the +platform has glibc 2.12 or newer, it is assumed to be compatible unless the +``_manylinux`` module says otherwise. + +Specifically, the algorithm we propose is:: + + def is_manylinux2010_compatible(): + # Only Linux, and only x86-64 / i686 + from distutils.util import get_platform + if get_platform() not in ["linux-x86_64", "linux-i686"]: + return False + + # Check for presence of _manylinux module + try: + import _manylinux + return bool(_manylinux.manylinux2010_compatible) + except (ImportError, AttributeError): + # Fall through to heuristic check below + pass + + # Check glibc version. CentOS 6 uses glibc 2.12. + # PEP 513 contains an implementation of this function. + return have_compatible_glibc(2, 12) + + +Backwards compatibility with ``manylinux1`` wheels +================================================== + +As explained in PEP 513, the specified symbol versions for +``manylinux1`` whitelisted libraries constitute an *upper bound*. The +same is true for the symbol versions defined for ``manylinux2010`` in +this PEP. As a result, ``manylinux1`` wheels are considered +``manylinux2010`` wheels. A ``pip`` that recognizes the ``manylinux2010`` +platform tag will thus install ``manylinux1`` wheels for +``manylinux2010`` platforms -- even when explicitly set -- when no +``manylinux2010`` wheels are available. [22]_ + +PyPI Support +============ + +PyPI should permit wheels containing the ``manylinux2010`` platform tag +to be uploaded in the same way that it permits ``manylinux1``. It +should not attempt to verify the compatibility of ``manylinux2010`` +wheels. + + +References +========== + +.. [1] PEP 513 -- A Platform Tag for Portable Linux Built Distributions + (https://www.python.org/dev/peps/pep-0513/) +.. [2] pyca/cryptography + (https://cryptography.io/) +.. [3] numpy + (https://numpy.org) +.. [4] CentOS 5.11 EOL announcement + (https://lists.centos.org/pipermail/centos-announce/2017-April/022350.html) +.. [5] CentOS Product Specifications + (https://web.archive.org/web/20180108090257/https://wiki.centos.org/About/Product) +.. [6] PEP 425 -- Compatibility Tags for Built Distributions + (https://www.python.org/dev/peps/pep-0425/) +.. [7] ncurses 5 -> 6 transition means we probably need to drop some + libraries from the manylinux whitelist + (https://github.com/pypa/manylinux/issues/94) +.. [8] PEP 3149 + https://www.python.org/dev/peps/pep-3149/ +.. [9] SOABI support for Python 2.X and PyPy + https://github.com/pypa/pip/pull/3075 +.. [10] manylinux2 Docker images + (https://hub.docker.com/r/markrwilliams/manylinux2/) +.. [11] On vsyscalls and the vDSO + (https://lwn.net/Articles/446528/) +.. [12] vdso(7) + (http://man7.org/linux/man-pages/man7/vdso.7.html) +.. [13] Framing Signals -- A Return to Portable Shellcode + (http://www.cs.vu.nl/~herbertb/papers/srop_sp14.pdf) +.. [14] ChangeLog-3.1 + (https://www.kernel.org/pub/linux/kernel/v3.x/ChangeLog-3.1) +.. [15] Project Zero: Three bypasses and a fix for one of Flash's Vector.<*> mitigations + (https://googleprojectzero.blogspot.com/2015/08/three-bypasses-and-fix-for-one-of.html) +.. [16] linux: activate CONFIG_LEGACY_VSYSCALL_NONE ? + (https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=852620) +.. [17] [Wheel-builders] Heads-up re: new kernel configurations breaking the manylinux docker image + (https://mail.python.org/pipermail/wheel-builders/2016-December/000239.html) +.. [18] No longer used +.. [19] Travis CI + (https://travis-ci.org/) +.. [20] remove-vsyscall.patch + https://github.com/markrwilliams/manylinux/commit/e9493d55471d153089df3aafca8cfbcb50fa8093#diff-3eda4130bdba562657f3ec7c1b3f5720 +.. [21] auditwheel manylinux2 branch + (https://github.com/markrwilliams/auditwheel/tree/manylinux2) +.. [22] pip manylinux2 branch + https://github.com/markrwilliams/pip/commits/manylinux2 +.. [23] Calendar Versioning + http://calver.org/ + +Copyright +========= + +This document has been placed into the public domain. + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0572.rst b/pep-0572.rst new file mode 100644 index 000000000..18cad2e8e --- /dev/null +++ b/pep-0572.rst @@ -0,0 +1,1262 @@ +PEP: 572 +Title: Assignment Expressions +Author: Chris Angelico , Tim Peters , + Guido van Rossum +Status: Accepted +Type: Standards Track +Content-Type: text/x-rst +Created: 28-Feb-2018 +Python-Version: 3.8 +Post-History: 28-Feb-2018, 02-Mar-2018, 23-Mar-2018, 04-Apr-2018, 17-Apr-2018, + 25-Apr-2018, 09-Jul-2018 +Resolution: https://mail.python.org/pipermail/python-dev/2018-July/154601.html + + +Abstract +======== + +This is a proposal for creating a way to assign to variables within an +expression using the notation ``NAME := expr``. A new exception, +``TargetScopeError`` is added, and there is one change to evaluation +order. + +Rationale +========= + +Naming the result of an expression is an important part of programming, +allowing a descriptive name to be used in place of a longer expression, +and permitting reuse. Currently, this feature is available only in +statement form, making it unavailable in list comprehensions and other +expression contexts. + +Additionally, naming sub-parts of a large expression can assist an interactive +debugger, providing useful display hooks and partial results. Without a way to +capture sub-expressions inline, this would require refactoring of the original +code; with assignment expressions, this merely requires the insertion of a few +``name :=`` markers. Removing the need to refactor reduces the likelihood that +the code be inadvertently changed as part of debugging (a common cause of +Heisenbugs), and is easier to dictate to another programmer. + +The importance of real code +--------------------------- + +During the development of this PEP many people (supporters and critics +both) have had a tendency to focus on toy examples on the one hand, +and on overly complex examples on the other. + +The danger of toy examples is twofold: they are often too abstract to +make anyone go "ooh, that's compelling", and they are easily refuted +with "I would never write it that way anyway". + +The danger of overly complex examples is that they provide a +convenient strawman for critics of the proposal to shoot down ("that's +obfuscated"). + +Yet there is some use for both extremely simple and extremely complex +examples: they are helpful to clarify the intended semantics. +Therefore there will be some of each below. + +However, in order to be *compelling*, examples should be rooted in +real code, i.e. code that was written without any thought of this PEP, +as part of a useful application, however large or small. Tim Peters +has been extremely helpful by going over his own personal code +repository and picking examples of code he had written that (in his +view) would have been *clearer* if rewritten with (sparing) use of +assignment expressions. His conclusion: the current proposal would +have allowed a modest but clear improvement in quite a few bits of +code. + +Another use of real code is to observe indirectly how much value +programmers place on compactness. Guido van Rossum searched through a +Dropbox code base and discovered some evidence that programmers value +writing fewer lines over shorter lines. + +Case in point: Guido found several examples where a programmer +repeated a subexpression, slowing down the program, in order to save +one line of code, e.g. instead of writing:: + + match = re.match(data) + group = match.group(1) if match else None + +they would write:: + + group = re.match(data).group(1) if re.match(data) else None + +Another example illustrates that programmers sometimes do more work to +save an extra level of indentation:: + + match1 = pattern1.match(data) + match2 = pattern2.match(data) + if match1: + result = match1.group(1) + elif match2: + result = match2.group(2) + else: + result = None + +This code tries to match ``pattern2`` even if ``pattern1`` has a match +(in which case the match on ``pattern2`` is never used). The more +efficient rewrite would have been:: + + match1 = pattern1.match(data) + if match1: + result = match1.group(1) + else: + match2 = pattern2.match(data) + if match2: + result = match2.group(2) + else: + result = None + + +Syntax and semantics +==================== + +In most contexts where arbitrary Python expressions can be used, a +**named expression** can appear. This is of the form ``NAME := expr`` +where ``expr`` is any valid Python expression other than an +unparenthesized tuple, and ``NAME`` is an identifier. + +The value of such a named expression is the same as the incorporated +expression, with the additional side-effect that the target is assigned +that value:: + + # Handle a matched regex + if (match := pattern.search(data)) is not None: + # Do something with match + + # A loop that can't be trivially rewritten using 2-arg iter() + while chunk := file.read(8192): + process(chunk) + + # Reuse a value that's expensive to compute + [y := f(x), y**2, y**3] + + # Share a subexpression between a comprehension filter clause and its output + filtered_data = [y for x in data if (y := f(x)) is not None] + +Exceptional cases +----------------- + +There are a few places where assignment expressions are not allowed, +in order to avoid ambiguities or user confusion: + +- Unparenthesized assignment expressions are prohibited at the top + level of an expression statement. Example:: + + y := f(x) # INVALID + (y := f(x)) # Valid, though not recommended + + This rule is included to simplify the choice for the user between an + assignment statement and an assignment expression -- there is no + syntactic position where both are valid. + +- Unparenthesized assignment expressions are prohibited at the top + level of the right hand side of an assignment statement. Example:: + + y0 = y1 := f(x) # INVALID + y0 = (y1 := f(x)) # Valid, though discouraged + + Again, this rule is included to avoid two visually similar ways of + saying the same thing. + +- Unparenthesized assignment expressions are prohibited for the value + of a keyword argument in a call. Example:: + + foo(x = y := f(x)) # INVALID + foo(x=(y := f(x))) # Valid, though probably confusing + + This rule is included to disallow excessively confusing code, and + because parsing keyword arguments is complex enough already. + +- Unparenthesized assignment expressions are prohibited at the top + level of a function default value. Example:: + + def foo(answer = p := 42): # INVALID + ... + def foo(answer=(p := 42)): # Valid, though not great style + ... + + This rule is included to discourage side effects in a position whose + exact semantics are already confusing to many users (cf. the common + style recommendation against mutable default values), and also to + echo the similar prohibition in calls (the previous bullet). + +- Unparenthesized assignment expressions are prohibited as annotations + for arguments, return values and assignments. Example:: + + def foo(answer: p := 42 = 5): # INVALID + ... + def foo(answer: (p := 42) = 5): # Valid, but probably never useful + ... + + The reasoning here is similar to the two previous cases; this + ungrouped assortment of symbols and operators composed of ``:`` and + ``=`` is hard to read correctly. + +- Unparenthesized assignment expressions are prohibited in lambda functions. + Example:: + + (lambda: x := 1) # INVALID + lambda: (x := 1) # Valid, but unlikely to be useful + (x := lambda: 1) # Valid + lambda line: (m := re.match(pattern, line)) and m.group(1) # Valid + + This allows ``lambda`` to always bind less tightly than ``:=``; having a + name binding at the top level inside a lambda function is unlikely to be of + value, as there is no way to make use of it. In cases where the name will be + used more than once, the expression is likely to need parenthesizing anyway, + so this prohibition will rarely affect code. + +Scope of the target +------------------- + +An assignment expression does not introduce a new scope. In most +cases the scope in which the target will be bound is self-explanatory: +it is the current scope. If this scope contains a ``nonlocal`` or +``global`` declaration for the target, the assignment expression +honors that. A lambda (being an explicit, if anonymous, function +definition) counts as a scope for this purpose. + +There is one special case: an assignment expression occurring in a +list, set or dict comprehension or in a generator expression (below +collectively referred to as "comprehensions") binds the target in the +containing scope, honoring a ``nonlocal`` or ``global`` declaration +for the target in that scope, if one exists. For the purpose of this +rule the containing scope of a nested comprehension is the scope that +contains the outermost comprehension. A lambda counts as a containing +scope. + +The motivation for this special case is twofold. First, it allows us +to conveniently capture a "witness" for an ``any()`` expression, or a +counterexample for ``all()``, for example:: + + if any((comment := line).startswith('#') for line in lines): + print("First comment:", comment) + else: + print("There are no comments") + + if all((nonblank := line).strip() == '' for line in lines): + print("All lines are blank") + else: + print("First non-blank line:", nonblank) + +Second, it allows a compact way of updating mutable state from a +comprehension, for example:: + + # Compute partial sums in a list comprehension + total = 0 + partial_sums = [total := total + v for v in values] + print("Total:", total) + +However, an assignment expression target name cannot be the same as a +``for``-target name appearing in any comprehension containing the +assignment expression. The latter names are local to the +comprehension in which they appear, so it would be contradictory for a +contained use of the same name to refer to the scope containing the +outermost comprehension instead. + +For example, ``[i := i+1 for i in range(5)]`` is invalid: the ``for +i`` part establishes that ``i`` is local to the comprehension, but the +``i :=`` part insists that ``i`` is not local to the comprehension. +The same reason makes these examples invalid too:: + + [[(j := j) for i in range(5)] for j in range(5)] + [i := 0 for i, j in stuff] + [i+1 for i in i := stuff] + +A further exception applies when an assignment expression occurs in a +comprehension whose containing scope is a class scope. If the rules +above were to result in the target being assigned in that class's +scope, the assignment expression is expressly invalid. + +(The reason for the latter exception is the implicit function created +for comprehensions -- there is currently no runtime mechanism for a +function to refer to a variable in the containing class scope, and we +do not want to add such a mechanism. If this issue ever gets resolved +this special case may be removed from the specification of assignment +expressions. Note that the problem already exists for *using* a +variable defined in the class scope from a comprehension.) + +See Appendix B for some examples of how the rules for targets in +comprehensions translate to equivalent code. + +The two invalid cases listed above raise ``TargetScopeError``, a +new subclass of ``SyntaxError`` (with the same signature). + +Relative precedence of ``:=`` +----------------------------- + +The ``:=`` operator groups more tightly than a comma in all syntactic +positions where it is legal, but less tightly than all other operators, +including ``or``, ``and``, ``not``, and conditional expressions +(``A if C else B``). As follows from section +"Exceptional cases" above, it is never allowed at the same level as +``=``. In case a different grouping is desired, parentheses should be +used. + +The ``:=`` operator may be used directly in a positional function call +argument; however it is invalid directly in a keyword argument. + +Some examples to clarify what's technically valid or invalid:: + + # INVALID + x := 0 + + # Valid alternative + (x := 0) + + # INVALID + x = y := 0 + + # Valid alternative + x = (y := 0) + + # Valid + len(lines := f.readlines()) + + # Valid + foo(x := 3, cat='vector') + + # INVALID + foo(cat=category := 'vector') + + # Valid alternative + foo(cat=(category := 'vector')) + +Most of the "valid" examples above are not recommended, since human +readers of Python source code who are quickly glancing at some code +may miss the distinction. But simple cases are not objectionable:: + + # Valid + if any(len(longline := line) >= 100 for line in lines): + print("Extremely long line:", longline) + +This PEP recommends always putting spaces around ``:=``, similar to +PEP 8's recommendation for ``=`` when used for assignment, whereas the +latter disallows spaces around ``=`` used for keyword arguments.) + +Change to evaluation order +-------------------------- + +In order to have precisely defined semantics, the proposal requires +evaluation order to be well-defined. This is technically not a new +requirement, as function calls may already have side effects. Python +already has a rule that subexpressions are generally evaluated from +left to right. However, assignment expressions make these side +effects more visible, and we propose a single change to the current +evaluation order: + +- In a dict comprehension ``{X: Y for ...}``, ``Y`` is currently + evaluated before ``X``. We propose to change this so that ``X`` is + evaluated before ``Y``. (In a dict display like ``{X: Y}`` this is + already the case, and also in ``dict((X, Y) for ...)`` which should + clearly be equivalent to the dict comprehension.) + +Differences between assignment expressions and assignment statements +--------------------------------------------------------------------- + +Most importantly, since ``:=`` is an expression, it can be used in contexts +where statements are illegal, including lambda functions and comprehensions. + +Conversely, assignment expressions don't support the advanced features +found in assignment statements: + +- Multiple targets are not directly supported:: + + x = y = z = 0 # Equivalent: (z := (y := (x := 0))) + +- Single assignment targets other than a single ``NAME`` are + not supported:: + + # No equivalent + a[i] = x + self.rest = [] + +- Priority around commas is different:: + + x = 1, 2 # Sets x to (1, 2) + (x := 1, 2) # Sets x to 1 + +- Iterable packing and unpacking (both regular or extended forms) are + not supported:: + + # Equivalent needs extra parentheses + loc = x, y # Use (loc := (x, y)) + info = name, phone, *rest # Use (info := (name, phone, *rest)) + + # No equivalent + px, py, pz = position + name, phone, email, *other_info = contact + +- Inline type annotations are not supported:: + + # Closest equivalent is "p: Optional[int]" as a separate declaration + p: Optional[int] = None + +- Augmented assignment is not supported:: + + total += tax # Equivalent: (total := total + tax) + + +Examples +======== + +Examples from the Python standard library +----------------------------------------- + +site.py +^^^^^^^ + +*env_base* is only used on these lines, putting its assignment on the if +moves it as the "header" of the block. + +- Current:: + + env_base = os.environ.get("PYTHONUSERBASE", None) + if env_base: + return env_base + +- Improved:: + + if env_base := os.environ.get("PYTHONUSERBASE", None): + return env_base + +_pydecimal.py +^^^^^^^^^^^^^ + +Avoid nested ``if`` and remove one indentation level. + +- Current:: + + if self._is_special: + ans = self._check_nans(context=context) + if ans: + return ans + +- Improved:: + + if self._is_special and (ans := self._check_nans(context=context)): + return ans + +copy.py +^^^^^^^ + +Code looks more regular and avoid multiple nested if. +(See Appendix A for the origin of this example.) + +- Current:: + + reductor = dispatch_table.get(cls) + if reductor: + rv = reductor(x) + else: + reductor = getattr(x, "__reduce_ex__", None) + if reductor: + rv = reductor(4) + else: + reductor = getattr(x, "__reduce__", None) + if reductor: + rv = reductor() + else: + raise Error( + "un(deep)copyable object of type %s" % cls) + +- Improved:: + + if reductor := dispatch_table.get(cls): + rv = reductor(x) + elif reductor := getattr(x, "__reduce_ex__", None): + rv = reductor(4) + elif reductor := getattr(x, "__reduce__", None): + rv = reductor() + else: + raise Error("un(deep)copyable object of type %s" % cls) + +datetime.py +^^^^^^^^^^^ + +*tz* is only used for ``s += tz``, moving its assignment inside the if +helps to show its scope. + +- Current:: + + s = _format_time(self._hour, self._minute, + self._second, self._microsecond, + timespec) + tz = self._tzstr() + if tz: + s += tz + return s + +- Improved:: + + s = _format_time(self._hour, self._minute, + self._second, self._microsecond, + timespec) + if tz := self._tzstr(): + s += tz + return s + +sysconfig.py +^^^^^^^^^^^^ + +Calling ``fp.readline()`` in the ``while`` condition and calling +``.match()`` on the if lines make the code more compact without making +it harder to understand. + +- Current:: + + while True: + line = fp.readline() + if not line: + break + m = define_rx.match(line) + if m: + n, v = m.group(1, 2) + try: + v = int(v) + except ValueError: + pass + vars[n] = v + else: + m = undef_rx.match(line) + if m: + vars[m.group(1)] = 0 + +- Improved:: + + while line := fp.readline(): + if m := define_rx.match(line): + n, v = m.group(1, 2) + try: + v = int(v) + except ValueError: + pass + vars[n] = v + elif m := undef_rx.match(line): + vars[m.group(1)] = 0 + + +Simplifying list comprehensions +------------------------------- + +A list comprehension can map and filter efficiently by capturing +the condition:: + + results = [(x, y, x/y) for x in input_data if (y := f(x)) > 0] + +Similarly, a subexpression can be reused within the main expression, by +giving it a name on first use:: + + stuff = [[y := f(x), x/y] for x in range(5)] + +Note that in both cases the variable ``y`` is bound in the containing +scope (i.e. at the same level as ``results`` or ``stuff``). + + +Capturing condition values +-------------------------- + +Assignment expressions can be used to good effect in the header of +an ``if`` or ``while`` statement:: + + # Loop-and-a-half + while (command := input("> ")) != "quit": + print("You entered:", command) + + # Capturing regular expression match objects + # See, for instance, Lib/pydoc.py, which uses a multiline spelling + # of this effect + if match := re.search(pat, text): + print("Found:", match.group(0)) + # The same syntax chains nicely into 'elif' statements, unlike the + # equivalent using assignment statements. + elif match := re.search(otherpat, text): + print("Alternate found:", match.group(0)) + elif match := re.search(third, text): + print("Fallback found:", match.group(0)) + + # Reading socket data until an empty string is returned + while data := sock.recv(8192): + print("Received data:", data) + +Particularly with the ``while`` loop, this can remove the need to have an +infinite loop, an assignment, and a condition. It also creates a smooth +parallel between a loop which simply uses a function call as its condition, +and one which uses that as its condition but also uses the actual value. + +Fork +---- + +An example from the low-level UNIX world:: + + if pid := os.fork(): + # Parent code + else: + # Child code + + +Rejected alternative proposals +============================== + +Proposals broadly similar to this one have come up frequently on python-ideas. +Below are a number of alternative syntaxes, some of them specific to +comprehensions, which have been rejected in favour of the one given above. + + +Changing the scope rules for comprehensions +------------------------------------------- + +A previous version of this PEP proposed subtle changes to the scope +rules for comprehensions, to make them more usable in class scope and +to unify the scope of the "outermost iterable" and the rest of the +comprehension. However, this part of the proposal would have caused +backwards incompatibilities, and has been withdrawn so the PEP can +focus on assignment expressions. + + +Alternative spellings +--------------------- + +Broadly the same semantics as the current proposal, but spelled differently. + +1. ``EXPR as NAME``:: + + stuff = [[f(x) as y, x/y] for x in range(5)] + + Since ``EXPR as NAME`` already has meaning in ``import``, + ``except`` and ``with`` statements (with different semantics), this + would create unnecessary confusion or require special-casing + (e.g. to forbid assignment within the headers of these statements). + + (Note that ``with EXPR as VAR`` does *not* simply assign the value + of ``EXPR`` to ``VAR`` -- it calls ``EXPR.__enter__()`` and assigns + the result of *that* to ``VAR``.) + + Additional reasons to prefer ``:=`` over this spelling include: + + - In ``if f(x) as y`` the assignment target doesn't jump out at you + -- it just reads like ``if f x blah blah`` and it is too similar + visually to ``if f(x) and y``. + + - In all other situations where an ``as`` clause is allowed, even + readers with intermediary skills are led to anticipate that + clause (however optional) by the keyword that starts the line, + and the grammar ties that keyword closely to the as clause: + + - ``import foo as bar`` + - ``except Exc as var`` + - ``with ctxmgr() as var`` + + To the contrary, the assignment expression does not belong to the + ``if`` or ``while`` that starts the line, and we intentionally + allow assignment expressions in other contexts as well. + + - The parallel cadence between + + - ``NAME = EXPR`` + - ``if NAME := EXPR`` + + reinforces the visual recognition of assignment expressions. + +2. ``EXPR -> NAME``:: + + stuff = [[f(x) -> y, x/y] for x in range(5)] + + This syntax is inspired by languages such as R and Haskell, and some + programmable calculators. (Note that a left-facing arrow ``y <- f(x)`` is + not possible in Python, as it would be interpreted as less-than and unary + minus.) This syntax has a slight advantage over 'as' in that it does not + conflict with ``with``, ``except`` and ``import``, but otherwise is + equivalent. But it is entirely unrelated to Python's other use of + ``->`` (function return type annotations), and compared to ``:=`` + (which dates back to Algol-58) it has a much weaker tradition. + +3. Adorning statement-local names with a leading dot:: + + stuff = [[(f(x) as .y), x/.y] for x in range(5)] # with "as" + stuff = [[(.y := f(x)), x/.y] for x in range(5)] # with ":=" + + This has the advantage that leaked usage can be readily detected, removing + some forms of syntactic ambiguity. However, this would be the only place + in Python where a variable's scope is encoded into its name, making + refactoring harder. + +4. Adding a ``where:`` to any statement to create local name bindings:: + + value = x**2 + 2*x where: + x = spam(1, 4, 7, q) + + Execution order is inverted (the indented body is performed first, followed + by the "header"). This requires a new keyword, unless an existing keyword + is repurposed (most likely ``with:``). See PEP 3150 for prior discussion + on this subject (with the proposed keyword being ``given:``). + +5. ``TARGET from EXPR``:: + + stuff = [[y from f(x), x/y] for x in range(5)] + + This syntax has fewer conflicts than ``as`` does (conflicting only with the + ``raise Exc from Exc`` notation), but is otherwise comparable to it. Instead + of paralleling ``with expr as target:`` (which can be useful but can also be + confusing), this has no parallels, but is evocative. + + +Special-casing conditional statements +------------------------------------- + +One of the most popular use-cases is ``if`` and ``while`` statements. Instead +of a more general solution, this proposal enhances the syntax of these two +statements to add a means of capturing the compared value:: + + if re.search(pat, text) as match: + print("Found:", match.group(0)) + +This works beautifully if and ONLY if the desired condition is based on the +truthiness of the captured value. It is thus effective for specific +use-cases (regex matches, socket reads that return `''` when done), and +completely useless in more complicated cases (e.g. where the condition is +``f(x) < 0`` and you want to capture the value of ``f(x)``). It also has +no benefit to list comprehensions. + +Advantages: No syntactic ambiguities. Disadvantages: Answers only a fraction +of possible use-cases, even in ``if``/``while`` statements. + + +Special-casing comprehensions +----------------------------- + +Another common use-case is comprehensions (list/set/dict, and genexps). As +above, proposals have been made for comprehension-specific solutions. + +1. ``where``, ``let``, or ``given``:: + + stuff = [(y, x/y) where y = f(x) for x in range(5)] + stuff = [(y, x/y) let y = f(x) for x in range(5)] + stuff = [(y, x/y) given y = f(x) for x in range(5)] + + This brings the subexpression to a location in between the 'for' loop and + the expression. It introduces an additional language keyword, which creates + conflicts. Of the three, ``where`` reads the most cleanly, but also has the + greatest potential for conflict (e.g. SQLAlchemy and numpy have ``where`` + methods, as does ``tkinter.dnd.Icon`` in the standard library). + +2. ``with NAME = EXPR``:: + + stuff = [(y, x/y) with y = f(x) for x in range(5)] + + As above, but reusing the ``with`` keyword. Doesn't read too badly, and needs + no additional language keyword. Is restricted to comprehensions, though, + and cannot as easily be transformed into "longhand" for-loop syntax. Has + the C problem that an equals sign in an expression can now create a name + binding, rather than performing a comparison. Would raise the question of + why "with NAME = EXPR:" cannot be used as a statement on its own. + +3. ``with EXPR as NAME``:: + + stuff = [(y, x/y) with f(x) as y for x in range(5)] + + As per option 2, but using ``as`` rather than an equals sign. Aligns + syntactically with other uses of ``as`` for name binding, but a simple + transformation to for-loop longhand would create drastically different + semantics; the meaning of ``with`` inside a comprehension would be + completely different from the meaning as a stand-alone statement, while + retaining identical syntax. + +Regardless of the spelling chosen, this introduces a stark difference between +comprehensions and the equivalent unrolled long-hand form of the loop. It is +no longer possible to unwrap the loop into statement form without reworking +any name bindings. The only keyword that can be repurposed to this task is +``with``, thus giving it sneakily different semantics in a comprehension than +in a statement; alternatively, a new keyword is needed, with all the costs +therein. + + +Lowering operator precedence +---------------------------- + +There are two logical precedences for the ``:=`` operator. Either it should +bind as loosely as possible, as does statement-assignment; or it should bind +more tightly than comparison operators. Placing its precedence between the +comparison and arithmetic operators (to be precise: just lower than bitwise +OR) allows most uses inside ``while`` and ``if`` conditions to be spelled +without parentheses, as it is most likely that you wish to capture the value +of something, then perform a comparison on it:: + + pos = -1 + while pos := buffer.find(search_term, pos + 1) >= 0: + ... + +Once find() returns -1, the loop terminates. If ``:=`` binds as loosely as +``=`` does, this would capture the result of the comparison (generally either +``True`` or ``False``), which is less useful. + +While this behaviour would be convenient in many situations, it is also harder +to explain than "the := operator behaves just like the assignment statement", +and as such, the precedence for ``:=`` has been made as close as possible to +that of ``=`` (with the exception that it binds tighter than comma). + + +Allowing commas to the right +---------------------------- + +Some critics have claimed that the assignment expressions should allow +unparenthesized tuples on the right, so that these two would be equivalent:: + + (point := (x, y)) + (point := x, y) + +(With the current version of the proposal, the latter would be +equivalent to ``((point := x), y)``.) + +However, adopting this stance would logically lead to the conclusion +that when used in a function call, assignment expressions also bind +less tight than comma, so we'd have the following confusing equivalence:: + + foo(x := 1, y) + foo(x := (1, y)) + +The less confusing option is to make ``:=`` bind more tightly than comma. + + +Always requiring parentheses +---------------------------- + +It's been proposed to just always require parenthesize around an +assignment expression. This would resolve many ambiguities, and +indeed parentheses will frequently be needed to extract the desired +subexpression. But in the following cases the extra parentheses feel +redundant:: + + # Top level in if + if match := pattern.match(line): + return match.group(1) + + # Short call + len(lines := f.readlines()) + + +Frequently Raised Objections +============================ + +Why not just turn existing assignment into an expression? +--------------------------------------------------------- + +C and its derivatives define the ``=`` operator as an expression, rather than +a statement as is Python's way. This allows assignments in more contexts, +including contexts where comparisons are more common. The syntactic similarity +between ``if (x == y)`` and ``if (x = y)`` belies their drastically different +semantics. Thus this proposal uses ``:=`` to clarify the distinction. + + +With assignment expressions, why bother with assignment statements? +------------------------------------------------------------------- + +The two forms have different flexibilities. The ``:=`` operator can be used +inside a larger expression; the ``=`` statement can be augmented to ``+=`` and +its friends, can be chained, and can assign to attributes and subscripts. + + +Why not use a sublocal scope and prevent namespace pollution? +------------------------------------------------------------- + +Previous revisions of this proposal involved sublocal scope (restricted to a +single statement), preventing name leakage and namespace pollution. While a +definite advantage in a number of situations, this increases complexity in +many others, and the costs are not justified by the benefits. In the interests +of language simplicity, the name bindings created here are exactly equivalent +to any other name bindings, including that usage at class or module scope will +create externally-visible names. This is no different from ``for`` loops or +other constructs, and can be solved the same way: ``del`` the name once it is +no longer needed, or prefix it with an underscore. + +(The author wishes to thank Guido van Rossum and Christoph Groth for their +suggestions to move the proposal in this direction. [2]_) + + +Style guide recommendations +=========================== + +As expression assignments can sometimes be used equivalently to statement +assignments, the question of which should be preferred will arise. For the +benefit of style guides such as PEP 8, two recommendations are suggested. + +1. If either assignment statements or assignment expressions can be + used, prefer statements; they are a clear declaration of intent. + +2. If using assignment expressions would lead to ambiguity about + execution order, restructure it to use statements instead. + + +Acknowledgements +================ + +The authors wish to thank Nick Coghlan and Steven D'Aprano for their +considerable contributions to this proposal, and members of the +core-mentorship mailing list for assistance with implementation. + + +Appendix A: Tim Peters's findings +================================= + +Here's a brief essay Tim Peters wrote on the topic. + +I dislike "busy" lines of code, and also dislike putting conceptually +unrelated logic on a single line. So, for example, instead of:: + + i = j = count = nerrors = 0 + +I prefer:: + + i = j = 0 + count = 0 + nerrors = 0 + +instead. So I suspected I'd find few places I'd want to use +assignment expressions. I didn't even consider them for lines already +stretching halfway across the screen. In other cases, "unrelated" +ruled:: + + mylast = mylast[1] + yield mylast[0] + +is a vast improvement over the briefer:: + + yield (mylast := mylast[1])[0] + +The original two statements are doing entirely different conceptual +things, and slamming them together is conceptually insane. + +In other cases, combining related logic made it harder to understand, +such as rewriting:: + + while True: + old = total + total += term + if old == total: + return total + term *= mx2 / (i*(i+1)) + i += 2 + +as the briefer:: + + while total != (total := total + term): + term *= mx2 / (i*(i+1)) + i += 2 + return total + +The ``while`` test there is too subtle, crucially relying on strict +left-to-right evaluation in a non-short-circuiting or method-chaining +context. My brain isn't wired that way. + +But cases like that were rare. Name binding is very frequent, and +"sparse is better than dense" does not mean "almost empty is better +than sparse". For example, I have many functions that return ``None`` +or ``0`` to communicate "I have nothing useful to return in this case, +but since that's expected often I'm not going to annoy you with an +exception". This is essentially the same as regular expression search +functions returning ``None`` when there is no match. So there was lots +of code of the form:: + + result = solution(xs, n) + if result: + # use result + +I find that clearer, and certainly a bit less typing and +pattern-matching reading, as:: + + if result := solution(xs, n): + # use result + +It's also nice to trade away a small amount of horizontal whitespace +to get another _line_ of surrounding code on screen. I didn't give +much weight to this at first, but it was so very frequent it added up, +and I soon enough became annoyed that I couldn't actually run the +briefer code. That surprised me! + +There are other cases where assignment expressions really shine. +Rather than pick another from my code, Kirill Balunov gave a lovely +example from the standard library's ``copy()`` function in ``copy.py``:: + + reductor = dispatch_table.get(cls) + if reductor: + rv = reductor(x) + else: + reductor = getattr(x, "__reduce_ex__", None) + if reductor: + rv = reductor(4) + else: + reductor = getattr(x, "__reduce__", None) + if reductor: + rv = reductor() + else: + raise Error("un(shallow)copyable object of type %s" % cls) + +The ever-increasing indentation is semantically misleading: the logic +is conceptually flat, "the first test that succeeds wins":: + + if reductor := dispatch_table.get(cls): + rv = reductor(x) + elif reductor := getattr(x, "__reduce_ex__", None): + rv = reductor(4) + elif reductor := getattr(x, "__reduce__", None): + rv = reductor() + else: + raise Error("un(shallow)copyable object of type %s" % cls) + +Using easy assignment expressions allows the visual structure of the +code to emphasize the conceptual flatness of the logic; +ever-increasing indentation obscured it. + +A smaller example from my code delighted me, both allowing to put +inherently related logic in a single line, and allowing to remove an +annoying "artificial" indentation level:: + + diff = x - x_base + if diff: + g = gcd(diff, n) + if g > 1: + return g + +became:: + + if (diff := x - x_base) and (g := gcd(diff, n)) > 1: + return g + +That ``if`` is about as long as I want my lines to get, but remains easy +to follow. + +So, in all, in most lines binding a name, I wouldn't use assignment +expressions, but because that construct is so very frequent, that +leaves many places I would. In most of the latter, I found a small +win that adds up due to how often it occurs, and in the rest I found a +moderate to major win. I'd certainly use it more often than ternary +``if``, but significantly less often than augmented assignment. + +A numeric example +----------------- + +I have another example that quite impressed me at the time. + +Where all variables are positive integers, and a is at least as large +as the n'th root of x, this algorithm returns the floor of the n'th +root of x (and roughly doubling the number of accurate bits per +iteration):: + + while a > (d := x // a**(n-1)): + a = ((n-1)*a + d) // n + return a + +It's not obvious why that works, but is no more obvious in the "loop +and a half" form. It's hard to prove correctness without building on +the right insight (the "arithmetic mean - geometric mean inequality"), +and knowing some non-trivial things about how nested floor functions +behave. That is, the challenges are in the math, not really in the +coding. + +If you do know all that, then the assignment-expression form is easily +read as "while the current guess is too large, get a smaller guess", +where the "too large?" test and the new guess share an expensive +sub-expression. + +To my eyes, the original form is harder to understand:: + + while True: + d = x // a**(n-1) + if a <= d: + break + a = ((n-1)*a + d) // n + return a + + +Appendix B: Rough code translations for comprehensions +====================================================== + +This appendix attempts to clarify (though not specify) the rules when +a target occurs in a comprehension or in a generator expression. +For a number of illustrative examples we show the original code, +containing a comprehension, and the translation, where the +comprehension has been replaced by an equivalent generator function +plus some scaffolding. + +Since ``[x for ...]`` is equivalent to ``list(x for ...)`` these +examples all use list comprehensions without loss of generality. +And since these examples are meant to clarify edge cases of the rules, +they aren't trying to look like real code. + +Note: comprehensions are already implemented via synthesizing nested +generator functions like those in this appendix. The new part is +adding appropriate declarations to establish the intended scope of +assignment expression targets (the same scope they resolve to as if +the assignment were performed in the block containing the outermost +comprehension). For type inference purposes, these illustrative +expansions do not imply that assignment expression targets are always +Optional (but they do indicate the target binding scope). + +Let's start with a reminder of what code is generated for a generator +expression without assignment expression. + +- Original code (EXPR usually references VAR):: + + def f(): + a = [EXPR for VAR in ITERABLE] + +- Translation (let's not worry about name conflicts):: + + def f(): + def genexpr(iterator): + for VAR in iterator: + yield EXPR + a = list(genexpr(iter(ITERABLE))) + +Let's add a simple assignment expression. + +- Original code:: + + def f(): + a = [TARGET := EXPR for VAR in ITERABLE] + +- Translation:: + + def f(): + if False: + TARGET = None # Dead code to ensure TARGET is a local variable + def genexpr(iterator): + nonlocal TARGET + for VAR in iterator: + TARGET = EXPR + yield TARGET + a = list(genexpr(iter(ITERABLE))) + +Let's add a ``global TARGET`` declaration in ``f()``. + +- Original code:: + + def f(): + global TARGET + a = [TARGET := EXPR for VAR in ITERABLE] + +- Translation:: + + def f(): + global TARGET + def genexpr(iterator): + global TARGET + for VAR in iterator: + TARGET = EXPR + yield TARGET + a = list(genexpr(iter(ITERABLE))) + +Or instead let's add a ``nonlocal TARGET`` declaration in ``f()``. + +- Original code:: + + def g(): + TARGET = ... + def f(): + nonlocal TARGET + a = [TARGET := EXPR for VAR in ITERABLE] + +- Translation:: + + def g(): + TARGET = ... + def f(): + nonlocal TARGET + def genexpr(iterator): + nonlocal TARGET + for VAR in iterator: + TARGET = EXPR + yield TARGET + a = list(genexpr(iter(ITERABLE))) + +Finally, let's nest two comprehensions. + +- Original code:: + + def f(): + a = [[TARGET := i for i in range(3)] for j in range(2)] + # I.e., a = [[0, 1, 2], [0, 1, 2]] + print(TARGET) # prints 2 + +- Translation:: + + def f(): + if False: + TARGET = None + def outer_genexpr(outer_iterator): + nonlocal TARGET + def inner_generator(inner_iterator): + nonlocal TARGET + for i in inner_iterator: + TARGET = i + yield i + for j in outer_iterator: + yield list(inner_generator(range(3))) + a = list(outer_genexpr(range(2))) + print(TARGET) + + +Appendix C: No Changes to Scope Semantics +========================================= + +Because it has been a point of confusion, note that nothing about Python's +scoping semantics is changed. Function-local scopes continue to be resolved +at compile time, and to have indefinite temporal extent at run time ("full +closures"). Example:: + + a = 42 + def f(): + # `a` is local to `f`, but remains unbound + # until the caller executes this genexp: + yield ((a := i) for i in range(3)) + yield lambda: a + 100 + print("done") + try: + print(f"`a` is bound to {a}") + assert False + except UnboundLocalError: + print("`a` is not yet bound") + +Then:: + + >>> results = list(f()) # [genexp, lambda] + done + `a` is not yet bound + # The execution frame for f no longer exists in CPython, + # but f's locals live so long as they can still be referenced. + >>> list(map(type, results)) + [, ] + >>> list(results[0]) + [0, 1, 2] + >>> results[1]() + 102 + >>> a + 42 + + +References +========== + +.. [1] Proof of concept implementation + (https://github.com/Rosuav/cpython/tree/assignment-expressions) +.. [2] Pivotal post regarding inline assignment semantics + (https://mail.python.org/pipermail/python-ideas/2018-March/049409.html) + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0573.rst b/pep-0573.rst new file mode 100644 index 000000000..d15d4a318 --- /dev/null +++ b/pep-0573.rst @@ -0,0 +1,568 @@ +PEP: 573 +Title: Module State Access from C Extension Methods +Version: $Revision$ +Last-Modified: $Date$ +Author: Petr Viktorin , + Nick Coghlan , + Eric Snow + Marcel Plch +Discussions-To: import-sig@python.org +Status: Active +Type: Process +Content-Type: text/x-rst +Created: 02-Jun-2016 +Python-Version: 3.8 +Post-History: + + +Abstract +======== + +This PEP proposes to add a way for CPython extension methods to access context such as +the state of the modules they are defined in. + +This will allow extension methods to use direct pointer dereferences +rather than PyState_FindModule for looking up module state, reducing or eliminating the +performance cost of using module-scoped state over process global state. + +This fixes one of the remaining roadblocks for adoption of PEP 3121 (Extension +module initialization and finalization) and PEP 489 +(Multi-phase extension module initialization). + +Additionaly, support for easier creation of immutable exception classes is added. +This removes the need for keeping per-module state if it would only be used +for exception classes. + +While this PEP takes an additional step towards fully solving the problems that PEP 3121 and PEP 489 started +tackling, it does not attempt to resolve *all* remaining concerns. In particular, accessing the module state from slot methods (``nb_add``, etc) remains slower than accessing that state from other extension methods. + + +Terminology +=========== + +Process-Global State +-------------------- + +C-level static variables. Since this is very low-level +memory storage, it must be managed carefully. + +Per-module State +---------------- + +State local to a module object, allocated dynamically as part of a +module object's initialization. This isolates the state from other +instances of the module (including those in other subinterpreters). + +Accessed by ``PyModule_GetState()``. + + +Static Type +----------- + +A type object defined as a C-level static variable, i.e. a compiled-in type object. + +A static type needs to be shared between module instances and has no +information of what module it belongs to. +Static types do not have ``__dict__`` (although their instances might). + +Heap Type +--------- + +A type object created at run time. + + +Rationale +========= + +PEP 489 introduced a new way to initialize extension modules, which brings +several advantages to extensions that implement it: + + * The extension modules behave more like their Python counterparts. + * The extension modules can easily support loading into pre-existing + module objects, which paves the way for extension module support for + ``runpy`` or for systems that enable extension module reloading. + * Loading multiple modules from the same extension is possible, which + makes testing module isolation (a key feature for proper sub-interpreter + support) possible from a single interpreter. + +The biggest hurdle for adoption of PEP 489 is allowing access to module state +from methods of extension types. +Currently, the way to access this state from extension methods is by looking up the module via +``PyState_FindModule`` (in contrast to module level functions in extension modules, which +receive a module reference as an argument). +However, ``PyState_FindModule`` queries the thread-local state, making it relatively +costly compared to C level process global access and consequently deterring module authors from using it. + +Also, ``PyState_FindModule`` relies on the assumption that in each +subinterpreter, there is at most one module corresponding to +a given ``PyModuleDef``. This does not align well with Python's import +machinery. Since PEP 489 aimed to fix that, the assumption does +not hold for modules that use multi-phase initialization, so +``PyState_FindModule`` is unavailable for these modules. + +A faster, safer way of accessing module-level state from extension methods +is needed. + + +Immutable Exception Types +------------------------- + +For isolated modules to work, any class whose methods touch module state +must be a heap type, so that each instance of a module can have its own +type object. With the changes proposed in this PEP, heap type instances will +have access to module state without global registration. But, to create +instances of heap types, one will need the module state in order to +get the type object corresponding to the appropriate module. +In short, heap types are "viral" – anything that “touches” them must itself be +a heap type. + +Curently, most exception types, apart from the ones in ``builtins``, are +heap types. This is likely simply because there is a convenient way +to create them: ``PyErr_NewException``. +Heap types generally have a mutable ``__dict__``. +In most cases, this mutability is harmful. For example, exception types +from the ``sqlite`` module are mutable and shared across subinterpreters. +This allows "smuggling" values to other subinterpreters via attributes of +``sqlite3.Error``. + +Moreover, since raising exceptions is a common operation, and heap types +will be "viral", ``PyErr_NewException`` will tend to "infect" the module +with "heap type-ness" – at least if the module decides play well with +subinterpreters/isolation. +Many modules could go without module state +entirely if the exception classes were immutable. + +To solve this problem, a new function for creating immutable exception types +is proposed. + + +Background +=========== + +The implementation of a Python method may need access to one or more of +the following pieces of information: + + * The instance it is called on (``self``) + * The underlying function + * The class the method was defined in + * The corresponding module + * The module state + +In Python code, the Python-level equivalents may be retrieved as:: + + import sys + + def meth(self): + instance = self + module_globals = globals() + module_object = sys.modules[__name__] # (1) + underlying_function = Foo.meth # (1) + defining_class = Foo # (1) + defining_class = __class__ # (2) + +.. note:: + + The defining class is not ``type(self)``, since ``type(self)`` might + be a subclass of ``Foo``. + +The statements marked (1) implicitly rely on name-based lookup via the function's ``__globals__``: +either the ``Foo`` attribute to access the defining class and Python function object, or ``__name__`` to find the module object in ``sys.modules``. +In Python code, this is feasible, as ``__globals__`` is set appropriately when the function definition is executed, and +even if the namespace has been manipulated to return a different object, at worst an exception will be raised. + +The ``__class__`` closure, (2), is a safer way to get the defining class, but it still relies on ``__closure__`` being set appropriately. + +By contrast, extension methods are typically implemented as normal C functions. +This means that they only have access to their arguments and C level thread-local +and process-global states. Traditionally, many extension modules have stored +their shared state in C-level process globals, causing problems when: + + * running multiple initialize/finalize cycles in the same process + * reloading modules (e.g. to test conditional imports) + * loading extension modules in subinterpreters + +PEP 3121 attempted to resolve this by offering the ``PyState_FindModule`` API, but this still has significant problems when it comes to extension methods (rather than module level functions): + + * it is markedly slower than directly accessing C-level process-global state + * there is still some inherent reliance on process global state that means it still doesn't reliably handle module reloading + +It's also the case that when looking up a C-level struct such as module state, supplying +an unexpected object layout can crash the interpreter, so it's significantly more important to ensure that extension +methods receive the kind of object they expect. + +Proposal +======== + +Currently, a bound extension method (``PyCFunction`` or ``PyCFunctionWithKeywords``) receives only +``self``, and (if applicable) the supplied positional and keyword arguments. + +While module-level extension functions already receive access to the defining module object via their +``self`` argument, methods of extension types don't have that luxury: they receive the bound instance +via ``self``, and hence have no direct access to the defining class or the module level state. + +The additional module level context described above can be made available with two changes. +Both additions are optional; extension authors need to opt in to start +using them: + + * Add a pointer to the module to heap type objects. + + * Pass the defining class to the underlying C function. + + The defining class is readily available at the time built-in + method object (``PyCFunctionObject``) is created, so it can be stored + in a new struct that extends ``PyCFunctionObject``. + +The module state can then be retrieved from the module object via +``PyModule_GetState``. + +Note that this proposal implies that any type whose method needs to access +per-module state must be a heap type, rather than a static type. + +This is necessary to support loading multiple module objects from a single +extension: a static type, as a C-level global, has no information about +which module it belongs to. + + +Slot methods +------------ + +The above changes don't cover slot methods, such as ``tp_iter`` or ``nb_add``. + +The problem with slot methods is that their C API is fixed, so we can't +simply add a new argument to pass in the defining class. +Two possible solutions have been proposed to this problem: + + * Look up the class through walking the MRO. + This is potentially expensive, but will be useful if performance is not + a problem (such as when raising a module-level exception). + * Storing a pointer to the defining class of each slot in a separate table, + ``__typeslots__`` [#typeslots-mail]_. This is technically feasible and fast, + but quite invasive. + +Due to the invasiveness of the latter approach, this PEP proposes adding an MRO walking +helper for use in slot method implementations, deferring the more complex alternative +as a potential future optimisation. Modules affected by this concern also have the +option of using thread-local state or PEP 567 context variables, or else defining their +own reload-friendly lookup caching scheme. + + +Immutable Exception Types +------------------------- + +To facilitate creating static exception classes, a new function is proposed: +``PyErr_PrepareImmutableException``. It will work similarly to ``PyErr_NewExceptionWithDoc`` +but will take a ``PyTypeObject **`` pointer, which points to a ``PyTypeObject *`` that is +either ``NULL`` or an initialized ``PyTypeObject``. +This pointer may be declared in process-global state. The function will then +allocate the object and will keep in mind that already existing exception +should not be overwritten. + +The extra indirection makes it possible to make ``PyErr_PrepareImmutableException`` +part of the stable ABI by having the Python interpreter, rather than extension code, +allocate the ``PyTypeObject``. + + +Specification +============= + +Adding module references to heap types +-------------------------------------- + +The ``PyHeapTypeObject`` struct will get a new member, ``PyObject *ht_module``, +that can store a pointer to the module object for which the type was defined. +It will be ``NULL`` by default, and should not be modified after the type +object is created. + +A new factory method will be added for creating modules:: + + PyObject* PyType_FromModuleAndSpec(PyObject *module, + PyType_Spec *spec, + PyObject *bases) + +This acts the same as ``PyType_FromSpecWithBases``, and additionally sets +``ht_module`` to the provided module object. + +Additionally, an accessor, ``PyObject * PyType_GetModule(PyTypeObject *)`` +will be provided. +It will return the ``ht_module`` if a heap type with module pointer set +is passed in, otherwise it will set a SystemError and return NULL. + +Usually, creating a class with ``ht_module`` set will create a reference +cycle involving the class and the module. +This is not a problem, as tearing down modules is not a performance-sensitive +operation (and module-level functions typically also create reference cycles). +The existing "set all module globals to None" code that breaks function cycles +through ``f_globals`` will also break the new cycles through ``ht_module``. + + +Passing the defining class to extension methods +----------------------------------------------- + +A new style of C-level functions will be added to the current selection of +``PyCFunction`` and ``PyCFunctionWithKeywords``:: + + PyObject *PyCMethod(PyObject *self, + PyTypeObject *defining_class, + PyObject *args, PyObject *kwargs) + +A new method object flag, ``METH_METHOD``, will be added to signal that +the underlying C function is ``PyCMethod``. + +To hold the extra information, a new structure extending ``PyCFunctionObject`` +will be added:: + + typedef struct { + PyCFunctionObject func; + PyTypeObject *mm_class; /* Passed as 'defining_class' arg to the C func */ + } PyCMethodObject; + +To allow passing the defining class to the underlying C function, a change +to private API is required, now ``_PyMethodDef_RawFastCallDict`` and +``_PyMethodDef_RawFastCallKeywords`` will receive ``PyTypeObject *cls`` +as one of their arguments. + +A new macro ``PyCFunction_GET_CLASS(cls)`` will be added for easier access to mm_class. + +Method construction and calling code and will be updated to honor +``METH_METHOD``. + + +Argument Clinic +--------------- + +To support passing the defining class to methods using Argument Clinic, +a new converter will be added to clinic.py: ``defining_class``. + +Each method may only have one argument using this converter, and it must +appear after ``self``, or, if ``self`` is not used, as the first argument. +The argument will be of type ``PyTypeObject *``. + +When used, Argument Clinic will select ``METH_METHOD`` as the calling +convention. +The argument will not appear in ``__text_signature__``. + +This will be compatible with ``__init__`` and ``__new__`` methods, where an +MRO walker will be used to pass the defining class from clinic generated +code to the user's function. + + +Slot methods +------------ + +To allow access to per-module state from slot methods, an MRO walker +will be implemented:: + + PyTypeObject *PyType_DefiningTypeFromSlotFunc(PyTypeObject *type, + int slot, void *func) + +The walker will go through bases of heap-allocated ``type`` +and search for class that defines ``func`` at its ``slot``. + +The ``func`` needs not to be inherited by ``type``, only requirement +for the walker to find the defining class is that the defining class +must be heap-allocated. + +On failure, exception is set and NULL is returned. + + +Static exceptions +----------------- + +A new function will be added:: + + int PyErr_PrepareImmutableException(PyTypeObject **exc, + const char *name, + const char *doc, + PyObject *base) + +Creates an immutable exception type which can be shared +across multiple module objects. +If the type already exists (determined by a process-global pointer, +``*exc``), skip the initialization and only ``INCREF`` it. + +If ``*exc`` is NULL, the function will +allocate a new exception type and initialize it using given parameters +the same way ``PyType_FromSpecAndBases`` would. +The ``doc`` and ``base`` arguments may be ``NULL``, defaulting to a +missing docstring and ``PyExc_Exception`` base class, respectively. +The exception type's ``tp_flags`` will be set to values common to +built-in exceptions and the ``Py_TPFLAGS_HEAP_IMMUTABLE`` flag (see below) +will be set. +On failure, ``PyErr_PrepareImmutableException`` will set an exception +and return -1. + +If called with an initialized exception type (``*exc`` +is non-NULL), the function will do nothing but incref ``*exc``. + +A new flag, ``Py_TPFLAGS_HEAP_IMMUTABLE``, will be added to prevent +mutation of the type object. This makes it possible to +share the object safely between multiple interpreters. +This flag is checked in ``type_setattro`` and blocks +setting of attributes when set, similar to built-in types. + +A new pointer, ``ht_moduleptr``, will be added to heap types to store ``exc``. + +On deinitialization of the exception type, ``*exc`` will be set to ``NULL``. +This makes it safe for ``PyErr_PrepareImmutableException`` to check if +the exception was already initialized. + +PyType_offsets +-------------- + +Some extension types are using instances with ``__dict__`` or ``__weakref__`` +allocated. Currently, there is no way of passing offsets of these through +``PyType_Spec``. To allow this, a new structure and a spec slot are proposed. + +A new structure, ``PyType_offsets``, will have two members containing the +offsets of ``__dict__`` and ``__weakref__``:: + + typedef struct { + Py_ssize_t dict; + Py_ssize_t weaklist; + } PyType_offsets; + +The new slot, ``Py_offsets``, will be used to pass a ``PyType_offsets *`` +structure containing the mentioned data. + + +Helpers +------- + +Getting to per-module state from a heap type is a very common task. To make this +easier, a helper will be added:: + + void *PyType_GetModuleState(PyObject *type) + +This function takes a heap type and on success, it returns pointer to state of the +module that the heap type belongs to. + +On failure, two scenarios may occure. When a type without a module is passed in, +``SystemError`` is set and ``NULL`` returned. If the module is found, pointer +to the state, which may be ``NULL``, is returned without setting any exception. + + +Modules Converted in the Initial Implementation +----------------------------------------------- + +To validate the approach, several modules will be modified during +the initial implementation: + +The ``zipimport``, ``_io``, ``_elementtree``, and ``_csv`` modules +will be ported to PEP 489 multiphase initialization. + + +Summary of API Changes and Additions +==================================== + +New functions: + +* PyType_GetModule +* PyType_DefiningTypeFromSlotFunc +* PyType_GetModuleState +* PyErr_PrepareImmutableException + +New macros: + +* PyCFunction_GET_CLASS + +New types: + +* PyCMethodObject + +New structures: + +* PyType_offsets + +Modified functions: + +* _PyMethodDef_RawFastCallDict now receives ``PyTypeObject *cls``. +* _PyMethodDef_RawFastCallKeywords now receives ``PyTypeObject *cls``. + +Modified structures: + +* _heaptypeobject - added ht_module and ht_moduleptr + +Other changes: + +* METH_METHOD call flag +* defining_class converter in clinic +* Py_TPFLAGS_HEAP_IMMUTABLE flag +* Py_offsets type spec slot + + +Backwards Compatibility +======================= + +Two new pointers are added to all heap types. +All other changes are adding new functions, structures and a type flag. + +The new ``PyErr_PrepareImmutableException`` function changes encourages +modules to switch from using heap type Exception classes to immutable ones, +and a number of modules will be switched in the initial implementation. +This change will prevent adding class attributes to such types. +For example, the following will raise AttributeError:: + + sqlite.OperationalError.foo = None + +Instances and subclasses of such exceptions will not be affected. + +Implementation +============== + +An initial implementation is available in a Github repository [#gh-repo]_; +a patchset is at [#gh-patch]_. + + +Possible Future Extensions +========================== + +Easy creation of types with module references +--------------------------------------------- + +It would be possible to add a PEP 489 execution slot type to make +creating heap types significantly easier than calling +``PyType_FromModuleAndSpec``. +This is left to a future PEP. + + +Optimization +------------ + +CPython optimizes calls to methods that have restricted signatures, +such as not allowing keyword arguments. + +As proposed here, methods defined with the ``METH_METHOD`` flag do not support +these optimizations. + +Optimized calls still have the option of accessing per-module state +the same way slot methods do. + + +References +========== + +.. [#typeslots-mail] [Import-SIG] On singleton modules, heap types, and subinterpreters + (https://mail.python.org/pipermail/import-sig/2015-July/001035.html) + +.. [#gh-repo] + https://github.com/Traceur759/cpython/commits/pep-c + +.. [#gh-patch] + https://github.com/Traceur759/cpython/compare/master...Traceur759:pep-c.patch + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0574.rst b/pep-0574.rst new file mode 100644 index 000000000..1755f6a51 --- /dev/null +++ b/pep-0574.rst @@ -0,0 +1,512 @@ +PEP: 574 +Title: Pickle protocol 5 with out-of-band data +Version: $Revision$ +Last-Modified: $Date$ +Author: Antoine Pitrou +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 23-Mar-2018 +Post-History: 28-Mar-2018 +Resolution: + + +Abstract +======== + +This PEP proposes to standardize a new pickle protocol version, and +accompanying APIs to take full advantage of it: + +1. A new pickle protocol version (5) to cover the extra metadata needed + for out-of-band data buffers. +2. A new ``PickleBuffer`` type for ``__reduce_ex__`` implementations + to return out-of-band data buffers. +3. A new ``buffer_callback`` parameter when pickling, to handle out-of-band + data buffers. +4. A new ``buffers`` parameter when unpickling to provide out-of-band data + buffers. + +The PEP guarantees unchanged behaviour for anyone not using the new APIs. + + +Rationale +========= + +The pickle protocol was originally designed in 1995 for on-disk persistency +of arbitrary Python objects. The performance of a 1995-era storage medium +probably made it irrelevant to focus on performance metrics such as +use of RAM bandwidth when copying temporary data before writing it to disk. + +Nowadays the pickle protocol sees a growing use in applications where most +of the data isn't ever persisted to disk (or, when it is, it uses a portable +format instead of Python-specific). Instead, pickle is being used to transmit +data and commands from one process to another, either on the same machine +or on multiple machines. Those applications will sometimes deal with very +large data (such as Numpy arrays or Pandas dataframes) that need to be +transferred around. For those applications, pickle is currently +wasteful as it imposes spurious memory copies of the data being serialized. + +As a matter of fact, the standard ``multiprocessing`` module uses pickle +for serialization, and therefore also suffers from this problem when +sending large data to another process. + +Third-party Python libraries, such as Dask [#dask]_, PyArrow [#pyarrow]_ +and IPyParallel [#ipyparallel]_, have started implementing alternative +serialization schemes with the explicit goal of avoiding copies on large +data. Implementing a new serialization scheme is difficult and often +leads to reduced generality (since many Python objects support pickle +but not the new serialization scheme). Falling back on pickle for +unsupported types is an option, but then you get back the spurious +memory copies you wanted to avoid in the first place. For example, +``dask`` is able to avoid memory copies for Numpy arrays and +built-in containers thereof (such as lists or dicts containing Numpy +arrays), but if a large Numpy array is an attribute of a user-defined +object, ``dask`` will serialize the user-defined object as a pickle +stream, leading to memory copies. + +The common theme of these third-party serialization efforts is to generate +a stream of object metadata (which contains pickle-like information about +the objects being serialized) and a separate stream of zero-copy buffer +objects for the payloads of large objects. Note that, in this scheme, +small objects such as ints, etc. can be dumped together with the metadata +stream. Refinements can include opportunistic compression of large data +depending on its type and layout, like ``dask`` does. + +This PEP aims to make ``pickle`` usable in a way where large data is handled +as a separate stream of zero-copy buffers, letting the application handle +those buffers optimally. + + +Example +======= + +To keep the example simple and avoid requiring knowledge of third-party +libraries, we will focus here on a bytearray object (but the issue is +conceptually the same with more sophisticated objects such as Numpy arrays). +Like most objects, the bytearray object isn't immediately understood by +the pickle module and must therefore specify its decomposition scheme. + +Here is how a bytearray object currently decomposes for pickling:: + + >>> b.__reduce_ex__(4) + (, (b'abc',), None) + +This is because the ``bytearray.__reduce_ex__`` implementation reads +morally as follows:: + + class bytearray: + + def __reduce_ex__(self, protocol): + if protocol == 4: + return type(self), bytes(self), None + # Legacy code for earlier protocols omitted + +In turn it produces the following pickle code:: + + >>> pickletools.dis(pickletools.optimize(pickle.dumps(b, protocol=4))) + 0: \x80 PROTO 4 + 2: \x95 FRAME 30 + 11: \x8c SHORT_BINUNICODE 'builtins' + 21: \x8c SHORT_BINUNICODE 'bytearray' + 32: \x93 STACK_GLOBAL + 33: C SHORT_BINBYTES b'abc' + 38: \x85 TUPLE1 + 39: R REDUCE + 40: . STOP + +(the call to ``pickletools.optimize`` above is only meant to make the +pickle stream more readable by removing the MEMOIZE opcodes) + +We can notice several things about the bytearray's payload (the sequence +of bytes ``b'abc'``): + +* ``bytearray.__reduce_ex__`` produces a first copy by instantiating a + new bytes object from the bytearray's data. +* ``pickle.dumps`` produces a second copy when inserting the contents of + that bytes object into the pickle stream, after the SHORT_BINBYTES opcode. +* Furthermore, when deserializing the pickle stream, a temporary bytes + object is created when the SHORT_BINBYTES opcode is encountered (inducing + a data copy). + +What we really want is something like the following: + +* ``bytearray.__reduce_ex__`` produces a *view* of the bytearray's data. +* ``pickle.dumps`` doesn't try to copy that data into the pickle stream + but instead passes the buffer view to its caller (which can decide on the + most efficient handling of that buffer). +* When deserializing, ``pickle.loads`` takes the pickle stream and the + buffer view separately, and passes the buffer view directly to the + bytearray constructor. + +We see that several conditions are required for the above to work: + +* ``__reduce__`` or ``__reduce_ex__`` must be able to return *something* + that indicates a serializable no-copy buffer view. +* The pickle protocol must be able to represent references to such buffer + views, instructing the unpickler that it may have to get the actual buffer + out of band. +* The ``pickle.Pickler`` API must provide its caller with a way + to receive such buffer views while serializing. +* The ``pickle.Unpickler`` API must similarly allow its caller to provide + the buffer views required for deserialization. +* For compatibility, the pickle protocol must also be able to contain direct + serializations of such buffer views, such that current uses of the ``pickle`` + API don't have to be modified if they are not concerned with memory copies. + + +Producer API +============ + +We are introducing a new type ``pickle.PickleBuffer`` which can be +instantiated from any buffer-supporting object, and is specifically meant +to be returned from ``__reduce__`` implementations:: + + class bytearray: + + def __reduce_ex__(self, protocol): + if protocol >= 5: + return type(self), (PickleBuffer(self),), None + # Legacy code for earlier protocols omitted + +``PickleBuffer`` is a simple wrapper that doesn't have all the memoryview +semantics and functionality, but is specifically recognized by the ``pickle`` +module if protocol 5 or higher is enabled. It is an error to try to +serialize a ``PickleBuffer`` with pickle protocol version 4 or earlier. + +Only the raw *data* of the ``PickleBuffer`` will be considered by the +``pickle`` module. Any type-specific *metadata* (such as shapes or +datatype) must be returned separately by the type's ``__reduce__`` +implementation, as is already the case. + + +PickleBuffer objects +-------------------- + +The ``PickleBuffer`` class supports a very simple Python API. Its constructor +takes a single PEP 3118-compatible object [#pep-3118]_. ``PickleBuffer`` +objects themselves support the buffer protocol, so consumers can +call ``memoryview(...)`` on them to get additional information +about the underlying buffer (such as the original type, shape, etc.). +In addition, ``PickleBuffer`` objects can be explicitly released using +their ``release()`` method. + +On the C side, a simple API will be provided to create and inspect +PickleBuffer objects: + +``PyObject *PyPickleBuffer_FromObject(PyObject *obj)`` + + Create a ``PickleBuffer`` object holding a view over the PEP 3118-compatible + *obj*. + +``PyPickleBuffer_Check(PyObject *obj)`` + + Return whether *obj* is a ``PickleBuffer`` instance. + +``const Py_buffer *PyPickleBuffer_GetBuffer(PyObject *picklebuf)`` + + Return a pointer to the internal ``Py_buffer`` owned by the ``PickleBuffer`` + instance. An exception is raised if the buffer is released. + +``int PyPickleBuffer_Release(PyObject *picklebuf)`` + + Release the ``PickleBuffer`` instance's underlying buffer. + + +``PickleBuffer`` can wrap any kind of buffer, including non-contiguous +buffers. It's up to consumers to decide how best to handle different kinds +of buffers (for example, some consumers may find it acceptable to make a +contiguous copy of non-contiguous buffers). + + +Consumer API +============ + +``pickle.Pickler.__init__`` and ``pickle.dumps`` are augmented with an additional +``buffer_callback`` parameter:: + + class Pickler: + def __init__(self, file, protocol=None, ..., buffer_callback=None): + """ + If *buffer_callback* is not None, then it is called with a list + of out-of-band buffer views when deemed necessary (this could be + once every buffer, or only after a certain size is reached, + or once at the end, depending on implementation details). The + callback should arrange to store or transmit those buffers without + changing their order. + + If *buffer_callback* is None (the default), buffer views are + serialized into *file* as part of the pickle stream. + + It is an error if *buffer_callback* is not None and *protocol* is + None or smaller than 5. + """ + + def pickle.dumps(obj, protocol=None, *, ..., buffer_callback=None): + """ + See above for *buffer_callback*. + """ + +``pickle.Unpickler.__init__`` and ``pickle.loads`` are augmented with an +additional ``buffers`` parameter:: + + class Unpickler: + def __init__(file, *, ..., buffers=None): + """ + If *buffers* is not None, it should be an iterable of buffer-enabled + objects that is consumed each time the pickle stream references + an out-of-band buffer view. Such buffers have been given in order + to the *buffer_callback* of a Pickler object. + + If *buffers* is None (the default), then the buffers are taken + from the pickle stream, assuming they are serialized there. + It is an error for *buffers* to be None if the pickle stream + was produced with a non-None *buffer_callback*. + """ + + def pickle.loads(data, *, ..., buffers=None): + """ + See above for *buffers*. + """ + + +Protocol changes +================ + +Three new opcodes are introduced: + +* ``BYTEARRAY8`` creates a bytearray from the data following it in the pickle + stream and pushes it on the stack (just like ``BINBYTES8`` does for bytes + objects); +* ``NEXT_BUFFER`` fetches a buffer from the ``buffers`` iterable and pushes + it on the stack. +* ``READONLY_BUFFER`` makes a readonly view of the top of the stack. + +When pickling encounters a ``PickleBuffer``, there can be four cases: + +* If a ``buffer_callback`` is given and the ``PickleBuffer`` is writable, + the ``PickleBuffer`` is given to the callback and a ``NEXT_BUFFER`` opcode + is appended to the pickle stream. +* If a ``buffer_callback`` is given and the ``PickleBuffer`` is readonly, + the ``PickleBuffer`` is given to the callback and a ``NEXT_BUFFER`` opcode + is appended to the pickle stream, followed by a ``READONLY_BUFFER`` opcode. +* If no ``buffer_callback`` is given and the ``PickleBuffer`` is writable, + it is serialized into the pickle stream as if it were a ``bytearray`` object. +* If no ``buffer_callback`` is given and the ``PickleBuffer`` is readonly, + it is serialized into the pickle stream as if it were a ``bytes`` object. + +The distinction between readonly and writable buffers is explained below +(see "Mutability"). + + +Side effects +============ + +Improved in-band performance +---------------------------- + +Even in-band pickling can be improved by returning a ``PickleBuffer`` +instance from ``__reduce_ex__``, as one copy is avoided on the serialization +path [#ogrisel-numpy]_. + + +Caveats +======= + +Mutability +---------- + +PEP 3118 buffers [#pep-3118]_ can be readonly or writable. Some objects, +such as Numpy arrays, need to be backed by a mutable buffer for full +operation. Pickle consumers that use the ``buffer_callback`` and ``buffers`` +arguments will have to be careful to recreate mutable buffers. When doing +I/O, this implies using buffer-passing API variants such as ``readinto`` +(which are also often preferrable for performance). + +Data sharing +------------ + +If you pickle and then unpickle an object in the same process, passing +out-of-band buffer views, then the unpickled object may be backed by the +same buffer as the original pickled object. + +For example, it might be reasonable to implement reduction of a Numpy array +as follows (crucial metadata such as shapes is omitted for simplicity):: + + class ndarray: + + def __reduce_ex__(self, protocol): + if protocol == 5: + return numpy.frombuffer, (PickleBuffer(self), self.dtype) + # Legacy code for earlier protocols omitted + +Then simply passing the PickleBuffer around from ``dumps`` to ``loads`` +will produce a new Numpy array sharing the same underlying memory as the +original Numpy object (and, incidentally, keeping it alive):: + + >>> import numpy as np + >>> a = np.zeros(10) + >>> a[0] + 0.0 + >>> buffers = [] + >>> data = pickle.dumps(a, protocol=5, buffer_callback=buffers.extend) + >>> b = pickle.loads(data, buffers=buffers) + >>> b[0] = 42 + >>> a[0] + 42.0 + +This won't happen with the traditional ``pickle`` API (i.e. without passing +``buffers`` and ``buffer_callback`` parameters), because then the buffer view +is serialized inside the pickle stream with a copy. + + +Rejected alternatives +===================== + +Using the existing persistent load interface +-------------------------------------------- + +The ``pickle`` persistence interface is a way of storing references to +designated objects in the pickle stream while handling their actual +serialization out of band. For example, one might consider the following +for zero-copy serialization of bytearrays:: + + class MyPickle(pickle.Pickler): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.buffers = [] + + def persistent_id(self, obj): + if type(obj) is not bytearray: + return None + else: + index = len(self.buffers) + self.buffers.append(obj) + return ('bytearray', index) + + + class MyUnpickle(pickle.Unpickler): + + def __init__(self, *args, buffers, **kwargs): + super().__init__(*args, **kwargs) + self.buffers = buffers + + def persistent_load(self, pid): + type_tag, index = pid + if type_tag == 'bytearray': + return self.buffers[index] + else: + assert 0 # unexpected type + +This mechanism has two drawbacks: + +* Each ``pickle`` consumer must reimplement ``Pickler`` and ``Unpickler`` + subclasses, with custom code for each type of interest. Essentially, + N pickle consumers end up each implementing custom code for M producers. + This is difficult (especially for sophisticated types such as Numpy + arrays) and poorly scalable. + +* Each object encountered by the pickle module (even simple built-in objects + such as ints and strings) triggers a call to the user's ``persistent_id()`` + method, leading to a possible performance drop compared to nominal. + + +Open questions +============== + +Should ``buffer_callback`` take a single buffers or a sequence of buffers? + +* Taking a single buffer would allow returning a boolean indicating whether + the given buffer should be serialized in-band or out-of-band. +* Taking a sequence of buffers is potentially more efficient by reducing + function call overhead. + +Should it be allowed to serialize a ``PickleBuffer`` in protocol 4 and earlier? +It would simply be serialized as a ``bytes`` object (if read-only) or +``bytearray`` (if writable). + +* It can make implementing ``__reduce__`` simpler. +* Serializing a ``bytearray`` in protocol 4 makes a supplementary memory + copy when ``bytearray.__reduce_ex__`` returns a ``bytes`` object. This + is a performance regression that may be overlooked by ``__reduce__`` + implementors. + + +Implementation +============== + +A first implementation is available in the author's GitHub fork [#pickle5-git]_. + +An experimental backport for Python 3.6 and 3.7 is downloadable from PyPI +[#pickle5-pypi]_. + + +Related work +============ + +Dask.distributed implements a custom zero-copy serialization with fallback +to pickle [#dask-serialization]_. + +PyArrow implements zero-copy component-based serialization for a few +selected types [#pyarrow-serialization]_. + +PEP 554 proposes hosting multiple interpreters in a single process, with +provisions for transferring buffers between interpreters as a communication +scheme [#pep-554]_. + + +Acknowledgements +================ + +Thanks to the following people for early feedback: Nick Coghlan, Olivier +Grisel, Stefan Krah, MinRK, Matt Rocklin, Eric Snow. + + +References +========== + +.. [#dask] Dask.distributed -- A lightweight library for distributed computing + in Python + https://distributed.readthedocs.io/ + +.. [#dask-serialization] Dask.distributed custom serialization + https://distributed.readthedocs.io/en/latest/serialization.html + +.. [#ipyparallel] IPyParallel -- Using IPython for parallel computing + https://ipyparallel.readthedocs.io/ + +.. [#pyarrow] PyArrow -- A cross-language development platform for in-memory data + https://arrow.apache.org/docs/python/ + +.. [#pyarrow-serialization] PyArrow IPC and component-based serialization + https://arrow.apache.org/docs/python/ipc.html#component-based-serialization + +.. [#pep-3118] PEP 3118 -- Revising the buffer protocol + https://www.python.org/dev/peps/pep-3118/ + +.. [#pep-554] PEP 554 -- Multiple Interpreters in the Stdlib + https://www.python.org/dev/peps/pep-0554/ + +.. [#pickle5-git] ``pickle5`` branch on GitHub + https://github.com/pitrou/cpython/tree/pickle5 + +.. [#pickle5-pypi] ``pickle5`` project on PyPI + https://pypi.org/project/pickle5/ + +.. [#ogrisel-numpy] Draft use of pickle protocol 5 for Numpy array pickling + https://gist.github.com/ogrisel/a2b0e5ae4987a398caa7f9277cb3b90a + + +Copyright +========= + +This document has been placed into the public domain. + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0575.rst b/pep-0575.rst new file mode 100644 index 000000000..1ded295d1 --- /dev/null +++ b/pep-0575.rst @@ -0,0 +1,1122 @@ +PEP: 575 +Title: Unifying function/method classes +Author: Jeroen Demeyer +Status: Withdrawn +Type: Standards Track +Content-Type: text/x-rst +Created: 27-Mar-2018 +Python-Version: 3.8 +Post-History: 31-Mar-2018, 12-Apr-2018, 27-Apr-2018, 5-May-2018 + + +Withdrawal notice +================= + +See PEP 580 for a better solution to allowing fast calling of custom classes. + +See PEP 579 for a broader discussion of some of the other issues from this PEP. + + +Abstract +======== + +Reorganize the class hierarchy for functions and methods +with the goal of reducing the difference between +built-in functions (implemented in C) and Python functions. +Mainly, make built-in functions behave more like Python functions +without sacrificing performance. + +A new base class ``base_function`` is introduced and the various function +classes, as well as ``method`` (renamed to ``bound_method``), inherit from it. + +We also allow subclassing the Python ``function`` class. + + +Motivation +========== + +Currently, CPython has two different function classes: +the first is Python functions, which is what you get +when defining a function with ``def`` or ``lambda``. +The second is built-in functions such as ``len``, ``isinstance`` or ``numpy.dot``. +These are implemented in C. + +These two classes are implemented completely independently and have different functionality. +In particular, it is currently not possible to implement a function efficiently in C +(only built-in functions can do that) +while still allowing introspection like ``inspect.signature`` or ``inspect.getsourcefile`` +(only Python functions can do that). +This is a problem for projects like Cython [#cython]_ that want to do exactly that. + +In Cython, this was worked around by inventing a new function class called ``cyfunction``. +Unfortunately, a new function class creates problems: +the ``inspect`` module does not recognize such functions as being functions [#bpo30071]_ +and the performance is worse +(CPython has specific optimizations for calling built-in functions). + +A second motivation is more generally making built-in functions and methods +behave more like Python functions and methods. +For example, Python unbound methods are just functions but +unbound methods of extension types (e.g. ``dict.get``) are a distinct class. +Bound methods of Python classes have a ``__func__`` attribute, +bound methods of extension types do not. + +Third, this PEP allows great customization of functions. +The ``function`` class becomes subclassable and custom function +subclasses are also allowed for functions implemented in C. +In the latter case, this can be done with the same performance +as true built-in functions. +All functions can access the function object +(the ``self`` in ``__call__``), paving the way for PEP 573. + + +New classes +=========== + +This is the new class hierarchy for functions and methods:: + + object + | + | + base_function + / | \ + / | \ + / | defined_function + / | \ + cfunction (*) | \ + | function + | + bound_method (*) + +The two classes marked with (*) do *not* allow subclassing; +the others do. + +There is no difference between functions and unbound methods, +while bound methods are instances of ``bound_method``. + +base_function +------------- + +The class ``base_function`` becomes a new base class for all function types. +It is based on the existing ``builtin_function_or_method`` class, +but with the following differences and new features: + +#. It acts as a descriptor implementing ``__get__`` to turn a function into a method + if ``m_self`` is ``NULL``. + If ``m_self`` is not ``NULL``, + then this is a no-op: the existing function is returned instead. + +#. A new read-only attribute ``__parent__``, represented in the C structure as ``m_parent``. + If this attribute exists, it represents the defining object. + For methods of extension types, this is the defining class (``__class__`` in plain Python) + and for functions of a module, this is the defining module. + In general, it can be any Python object. + If ``__parent__`` is a class, it carries special semantics: + in that case, the function must be called with ``self`` being an instance of that class. + Finally, ``__qualname__`` and ``__reduce__`` will use ``__parent__`` + as namespace (instead of ``__self__`` before). + +#. A new attribute ``__objclass__`` which equals ``__parent__`` if ``__parent__`` + is a class. Otherwise, accessing ``__objclass__`` raises ``AttributeError``. + This is meant to be backwards compatible with ``method_descriptor``. + +#. The field ``ml_doc`` and the attributes ``__doc__`` and + ``__text_signature__`` (see Argument Clinic [#clinic]_) + are not supported. + +#. A new flag ``METH_PASS_FUNCTION`` for ``ml_flags``. + If this flag is set, the C function stored in ``ml_meth`` is called with + an additional first argument equal to the function object. + +#. A new flag ``METH_BINDING`` for ``ml_flags`` which only applies to + functions of a module (not methods of a class). + If this flag is set, then ``m_self`` is set to ``NULL`` instead + of the module. + This allows the function to behave more like a Python function + as it enables ``__get__``. + +#. A new flag ``METH_CALL_UNBOUND`` to disable `self slicing`_. + +#. A new flag ``METH_PYTHON`` for ``ml_flags``. + This flag indicates that this function should be treated as Python function. + Ideally, use of this flag should be avoided because it goes + against the duck typing philosophy. + It is still needed in a few places though, for example `profiling`_. + +The goal of ``base_function`` is that it supports all different ways +of calling functions and methods in just one structure. +For example, the new flag ``METH_PASS_FUNCTION`` +will be used by the implementation of methods. + +It is not possible to directly create instances of ``base_function`` +(``tp_new`` is ``NULL``). +However, it is legal for C code to manually create instances. + +These are the relevant C structures:: + + PyTypeObject PyBaseFunction_Type; + + typedef struct { + PyObject_HEAD + PyCFunctionDef *m_ml; /* Description of the C function to call */ + PyObject *m_self; /* __self__: anything, can be NULL; readonly */ + PyObject *m_module; /* __module__: anything (typically str) */ + PyObject *m_parent; /* __parent__: anything, can be NULL; readonly */ + PyObject *m_weakreflist; /* List of weak references */ + } PyBaseFunctionObject; + + typedef struct { + const char *ml_name; /* The name of the built-in function/method */ + PyCFunction ml_meth; /* The C function that implements it */ + int ml_flags; /* Combination of METH_xxx flags, which mostly + describe the args expected by the C func */ + } PyCFunctionDef; + +Subclasses may extend ``PyCFunctionDef`` with extra fields. + +The Python attribute ``__self__`` returns ``m_self``, +except if ``METH_STATIC`` is set. +In that case or if ``m_self`` is ``NULL``, +then there is no ``__self__`` attribute at all. +For that reason, we write either ``m_self`` or ``__self__`` in this PEP +with slightly different meanings. + +cfunction +--------- + +This is the new version of the old ``builtin_function_or_method`` class. +The name ``cfunction`` was chosen to avoid confusion with "built-in" +in the sense of "something in the ``builtins`` module". +It also fits better with the C API which use the `PyCFunction`` prefix. + +The class ``cfunction`` is a copy of ``base_function``, with the following differences: + +#. ``m_ml`` points to a ``PyMethodDef`` structure, + extending ``PyCFunctionDef`` with an additional ``ml_doc`` + field to implement ``__doc__`` and ``__text_signature__`` + as read-only attributes:: + + typedef struct { + const char *ml_name; + PyCFunction ml_meth; + int ml_flags; + const char *ml_doc; + } PyMethodDef; + + Note that ``PyMethodDef`` is part of the Python Stable ABI [#ABI]_ + and it is used by practically all extension modules, + so we absolutely cannot change this structure. + +#. Argument Clinic [#clinic]_ is supported. + +#. ``__self__`` always exists. In the cases where ``base_function.__self__`` + would raise ``AttributeError``, instead ``None`` is returned. + +The type object is ``PyTypeObject PyCFunction_Type`` +and we define ``PyCFunctionObject`` as alias of ``PyBaseFunctionObject`` +(except for the type of ``m_ml``). + +defined_function +---------------- + +The class ``defined_function`` is an abstract base class meant +to indicate that the function has introspection support. +Instances of ``defined_function`` are required to support all attributes +that Python functions have, namely +``__code__``, ``__globals__``, ``__doc__``, +``__defaults__``, ``__kwdefaults__``, ``__closure__`` and ``__annotations__``. +There is also a ``__dict__`` to support attributes added by the user. + +None of these is required to be meaningful. +In particular, ``__code__`` may not be a working code object, +possibly only a few fields may be filled in. +This PEP does not dictate how the various attributes are implemented. +They may be simple struct members or more complicated descriptors. +Only read-only support is required, none of the attributes is required to be writable. + +The class ``defined_function`` is mainly meant for auto-generated C code, +for example produced by Cython [#cython]_. +There is no API to create instances of it. + +The C structure is the following:: + + PyTypeObject PyDefinedFunction_Type; + + typedef struct { + PyBaseFunctionObject base; + PyObject *func_dict; /* __dict__: dict or NULL */ + } PyDefinedFunctionObject; + +**TODO**: maybe find a better name for ``defined_function``. +Other proposals: ``inspect_function`` (anything that satisfies ``inspect.isfunction``), +``builtout_function`` (a function that is better built out; pun on builtin), +``generic_function`` (original proposal but conflicts with ``functools.singledispatch`` generic functions), +``user_function`` (defined by the user as opposed to CPython). + +function +-------- + +This is the class meant for functions implemented in Python. +Unlike the other function types, +instances of ``function`` can be created from Python code. +This is not changed, so we do not describe the details in this PEP. + +The layout of the C structure is the following:: + + PyTypeObject PyFunction_Type; + + typedef struct { + PyBaseFunctionObject base; + PyObject *func_dict; /* __dict__: dict or NULL */ + PyObject *func_code; /* __code__: code */ + PyObject *func_globals; /* __globals__: dict; readonly */ + PyObject *func_name; /* __name__: string */ + PyObject *func_qualname; /* __qualname__: string */ + PyObject *func_doc; /* __doc__: can be anything or NULL */ + PyObject *func_defaults; /* __defaults__: tuple or NULL */ + PyObject *func_kwdefaults; /* __kwdefaults__: dict or NULL */ + PyObject *func_closure; /* __closure__: tuple of cell objects or NULL; readonly */ + PyObject *func_annotations; /* __annotations__: dict or NULL */ + PyCFunctionDef _ml; /* Storage for base.m_ml */ + } PyFunctionObject; + +The descriptor ``__name__`` returns ``func_name``. +When setting ``__name__``, also ``base.m_ml->ml_name`` is updated +with the UTF-8 encoded name. + +The ``_ml`` field reserves space to be used by ``base.m_ml``. + +A ``base_function`` instance must have the flag ``METH_PYTHON`` set +if and only if it is an instance of ``function``. + +When constructing an instance of ``function`` from ``code`` and ``globals``, +an instance is created with ``base.m_ml = &_ml``, +``base.m_self = NULL``. + +To make subclassing easier, we also add a copy constructor: +if ``f`` is an instance of ``function``, then ``types.FunctionType(f)`` copies ``f``. +This conveniently allows using a custom function type as decorator:: + + >>> from types import FunctionType + >>> class CustomFunction(FunctionType): + ... pass + >>> @CustomFunction + ... def f(x): + ... return x + >>> type(f) + + +This also removes many use cases of ``functools.wraps``: +wrappers can be replaced by subclasses of ``function``. + +bound_method +------------ + +The class ``bound_method`` is used for all bound methods, +regardless of the class of the underlying function. +It adds one new attribute on top of ``base_function``: +``__func__`` points to that function. + +``bound_method`` replaces the old ``method`` class +which was used only for Python functions bound as method. + +There is a complication because we want to allow +constructing a method from an arbitrary callable. +This may be an already-bound method or simply not an instance of ``base_function``. +Therefore, in practice there are two kinds of methods: + +- For arbitrary callables, we use a single fixed ``PyCFunctionDef`` + structure with the ``METH_PASS_FUNCTION`` flag set. + +- For methods which bind instances of ``base_function`` + (more precisely, which have the ``Py_TPFLAGS_BASEFUNCTION`` flag set) + that have `self slicing`_, + we instead use the ``PyCFunctionDef`` from the original function. + This way, we don't lose any performance when calling bound methods. + In this case, the ``__func__`` attribute is only used to implement + various attributes but not for calling the method. + +When constructing a new method from a ``base_function``, +we check that the ``self`` object is an instance of ``__objclass__`` +(if a class was specified as parent) and raise a ``TypeError`` otherwise. + +The C structure is:: + + PyTypeObject PyMethod_Type; + + typedef struct { + PyBaseFunctionObject base; + PyObject *im_func; /* __func__: function implementing the method; readonly */ + } PyMethodObject; + + +Calling base_function instances +=============================== + +We specify the implementation of ``__call__`` for instances of ``base_function``. + +Checking __objclass__ +--------------------- + +First of all, a type check is done if the ``__parent__`` of the function +is a class +(recall that ``__objclass__`` then becomes an alias of ``__parent__``): +if ``m_self`` is ``NULL`` (this is the case for unbound methods of extension types), +then the function must be called with at least one positional argument +and the first (typically called ``self``) must be an instance of ``__objclass__``. +If not, a ``TypeError`` is raised. + +Note that bound methods have ``m_self != NULL``, so the ``__objclass__`` +is not checked. +Instead, the ``__objclass__`` check is done when constructing the method. + +Flags +----- + +For convenience, we define a new constant: +``METH_CALLFLAGS`` combines all flags from ``PyCFunctionDef.ml_flags`` +which specify the signature of the C function to be called. +It is equal to :: + + METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_PASS_FUNCTION + +Exactly one of the first four flags above must be set +and only ``METH_VARARGS`` and ``METH_FASTCALL`` may be combined with ``METH_KEYWORDS``. +Violating these rules is undefined behaviour. + +There are one new flags which affects calling functions, +namely ``METH_PASS_FUNCTION`` and ``METH_CALL_UNBOUND``. +Some flags are already documented in [#methoddoc]_. +We explain the others below. + +Self slicing +------------ + +If the function has ``m_self == NULL`` and the flag ``METH_CALL_UNBOUND`` +is not set, then the first positional argument (if any) +is removed from ``*args`` and instead passed as first argument to the C function. +Effectively, the first positional argument is treated as ``__self__``. +This is meant to support unbound methods +such that the C function does not see the difference +between bound and unbound method calls. +This does not affect keyword arguments in any way. + +This process is called *self slicing* and a function is said to +*have self slicing* if ``m_self == NULL`` and ``METH_CALL_UNBOUND`` is not set. + +Note that a ``METH_NOARGS`` function which has self slicing +effectively has one argument, namely ``self``. +Analogously, a ``METH_O`` function with self slicing has two arguments. + +METH_PASS_FUNCTION +------------------ + +If this flag is set, then the C function is called with an +additional first argument, namely the function itself +(the ``base_function`` instance). +As special case, if the function is a ``bound_method``, +then the underlying function of the method is passed +(but not recursively: if a ``bound_method`` wraps a ``bound_method``, +then ``__func__`` is only applied once). + +For example, an ordinary ``METH_VARARGS`` function has signature +``(PyObject *self, PyObject *args)``. +With ``METH_VARARGS | METH_PASS_FUNCTION``, this becomes +``(PyObject *func, PyObject *self, PyObject *args)``. + +METH_FASTCALL +------------- + +This is an existing but undocumented flag. +We suggest to officially support and document it. + +If the flag ``METH_FASTCALL`` is set without ``METH_KEYWORDS``, +then the ``ml_meth`` field is of type ``PyCFunctionFast`` +which takes the arguments ``(PyObject *self, PyObject *const *args, Py_ssize_t nargs)``. +Such a function takes only positional arguments and they are passed as plain C array +``args`` of length ``nargs``. + +If the flags ``METH_FASTCALL | METH_KEYWORDS`` are set, +then the ``ml_meth`` field is of type ``PyCFunctionFastKeywords`` +which takes the arguments ``(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)``. +The positional arguments are passed as C array ``args`` of length ``nargs``. +The *values* of the keyword arguments follow in that array, +starting at position ``nargs``. +The *keys* (names) of the keyword arguments are passed as a ``tuple`` in ``kwnames``. +As an example, assume that 3 positional and 2 keyword arguments are given. +Then ``args`` is an array of length 3 + 2 = 5, ``nargs`` equals 3 and ``kwnames`` is a 2-tuple. + + +Automatic creation of built-in functions +======================================== + +Python automatically generates instances of ``cfunction`` +for extension types (using the ``PyTypeObject.tp_methods`` field) and modules +(using the ``PyModuleDef.m_methods`` field). +The arrays ``PyTypeObject.tp_methods`` and ``PyModuleDef.m_methods`` +must be arrays of ``PyMethodDef`` structures. + +Unbound methods of extension types +---------------------------------- + +The type of unbound methods changes from ``method_descriptor`` +to ``cfunction``. +The object which appears as unbound method is the same object which +appears in the class ``__dict__``. +Python automatically sets the ``__parent__`` attribute to the defining class. + +Built-in functions of a module +------------------------------ + +For the case of functions of a module, +``__parent__`` will be set to the module. +Unless the flag ``METH_BINDING`` is given, also ``__self__`` +will be set to the module (for backwards compatibility). + +An important consequence is that such functions by default +do not become methods when used as attribute +(``base_function.__get__`` only does that if ``m_self`` was ``NULL``). +One could consider this a bug, but this was done for backwards compatibility reasons: +in an initial post on python-ideas [#proposal]_ the concensus was to keep this +misfeature of built-in functions. + +However, to allow this anyway for specific or newly implemented +built-in functions, the ``METH_BINDING`` flag prevents setting ``__self__``. + + +Further changes +=============== + +New type flag +------------- + +A new ``PyTypeObject`` flag (for ``tp_flags``) is added: +``Py_TPFLAGS_BASEFUNCTION`` to indicate that instances of this type are +functions which can be called and bound as method like a ``base_function``. + +This is different from flags like ``Py_TPFLAGS_LIST_SUBCLASS`` +because it indicates more than just a subclass: +it also indicates a default implementation of ``__call__`` and ``__get__``. +In particular, such subclasses of ``base_function`` +must follow the implementation from the section `Calling base_function instances`_. + +This flag is automatically set for extension types which +inherit the ``tp_call`` and ``tp_descr_get`` implementation from ``base_function``. +Extension types can explicitly specify it if they +override ``__call__`` or ``__get__`` in a compatible way. +The flag ``Py_TPFLAGS_BASEFUNCTION`` must never be set for a heap type +because that would not be safe (heap types can be changed dynamically). + +C API functions +--------------- + +We list some relevant Python/C API macros and functions. +Some of these are existing (possibly changed) functions, some are new: + +- ``int PyBaseFunction_CheckFast(PyObject *op)``: return true if ``op`` + is an instance of a class with the ``Py_TPFLAGS_BASEFUNCTION`` set. + This is the function that you need to use to determine + whether it is meaningful to access the ``base_function`` internals. + +- ``int PyBaseFunction_Check(PyObject *op)``: return true if ``op`` + is an instance of ``base_function``. + +- ``PyObject *PyBaseFunction_New(PyTypeObject *cls, PyCFunctionDef *ml, PyObject *self, PyObject *module, PyObject *parent)``: + create a new instance of ``cls`` (which must be a subclass of ``base_function``) + from the given data. + +- ``int PyCFunction_Check(PyObject *op)``: return true if ``op`` + is an instance of ``cfunction``. + +- ``int PyCFunction_NewEx(PyMethodDef* ml, PyObject *self, PyObject* module)``: + create a new instance of ``cfunction``. + As special case, if ``self`` is ``NULL``, + then set ``self = Py_None`` instead (for backwards compatibility). + If ``self`` is a module, then ``__parent__`` is set to ``self``. + Otherwise, ``__parent__`` is ``NULL``. + +- For many existing ``PyCFunction_...`` and ``PyMethod_`` functions, + we define a new function ``PyBaseFunction_...`` + acting on ``base_function`` instances. + The old functions are kept as aliases of the new functions. + +- ``int PyFunction_Check(PyObject *op)``: return true if ``op`` + is an instance of ``base_function`` with the ``METH_PYTHON`` flag set + (this is equivalent to checking whether ``op`` is an instance of ``function``). + +- ``int PyFunction_CheckFast(PyObject *op)``: equivalent to + ``PyFunction_Check(op) && PyBaseFunction_CheckFast(op)``. + +- ``int PyFunction_CheckExact(PyObject *op)``: return true + if the type of ``op`` is ``function``. + +- ``PyObject *PyFunction_NewPython(PyTypeObject *cls, PyObject *code, PyObject *globals, PyObject *name, PyObject *qualname)``: + create a new instance of ``cls`` (which must be a sublass of ``function``) + from the given data. + +- ``PyObject *PyFunction_New(PyObject *code, PyObject *globals)``: + create a new instance of ``function``. + +- ``PyObject *PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)``: + create a new instance of ``function``. + +- ``PyObject *PyFunction_Copy(PyTypeObject *cls, PyObject *func)``: + create a new instance of ``cls`` (which must be a sublass of ``function``) + by copying a given ``function``. + +Changes to the types module +--------------------------- + +Two types are added: ``types.BaseFunctionType`` corresponding to +``base_function`` and ``types.DefinedFunctionType`` corresponding to +``defined_function``. + +Apart from that, no changes to the ``types`` module are made. +In particular, ``types.FunctionType`` refers to ``function``. +However, the actual types will change: +in particular, ``types.BuiltinFunctionType`` will no longer be the same +as ``types.BuiltinMethodType``. + +Changes to the inspect module +----------------------------- + +The new function ``inspect.isbasefunction`` checks for an instance of ``base_function``. + +``inspect.isfunction`` checks for an instance of ``defined_function``. + +``inspect.isbuiltin`` checks for an instance of ``cfunction``. + +``inspect.isroutine`` checks ``isbasefunction`` or ``ismethoddescriptor``. + +**NOTE**: bpo-33261 [#bpo33261]_ should be fixed first. + +Profiling +--------- + +Currently, ``sys.setprofile`` supports ``c_call``, ``c_return`` and ``c_exception`` +events for built-in functions. +These events are generated when calling or returning from a built-in function. +By contrast, the ``call`` and ``return`` events are generated by the function itself. +So nothing needs to change for the ``call`` and ``return`` events. + +Since we no longer make a difference between C functions and Python functions, +we need to prevent the ``c_*`` events for Python functions. +This is done by not generating those events if the +``METH_PYTHON`` flag in ``ml_flags`` is set. + + +Non-CPython implementations +=========================== + +Most of this PEP is only relevant to CPython. +For other implementations of Python, +the two changes that are required are the ``base_function`` base class +and the fact that ``function`` can be subclassed. +The classes ``cfunction`` and ``defined_function`` are not required. + +We require ``base_function`` for consistency but we put no requirements on it: +it is acceptable if this is just a copy of ``object``. +Support for the new ``__parent__`` (and ``__objclass__``) attribute is not required. +If there is no ``defined_function`` class, +then ``types.DefinedFunctionType`` should be an alias of ``types.FunctionType``. + + +Rationale +========= + +Why not simply change existing classes? +--------------------------------------- + +One could try to solve the problem by keeping the existing classes +without introducing a new ``base_function`` class. + +That might look like a simpler solution but it is not: +it would require introspection support for 3 distinct classes: +``function``, ``builtin_function_or_method`` and ``method_descriptor``. +For the latter two classes, "introspection support" would mean +at a minimum allowing subclassing. +But we don't want to lose performance, so we want fast subclass checks. +This would require two new flags in ``tp_flags``. +And we want subclasses to allow ``__get__`` for built-in functions, +so we should implement the ``LOAD_METHOD`` opcode for built-in functions too. +More generally, a lot of functionality would need to be duplicated +and the end result would be far more complex code. + +It is also not clear how the introspection of built-in function subclasses +would interact with ``__text_signature__``. +Having two independent kinds of ``inspect.signature`` support on the same +class sounds like asking for problems. + +And this would not fix some of the other differences between built-in functions +and Python functions that were mentioned in the `motivation`_. + +Why __text_signature__ is not a solution +---------------------------------------- + +Built-in functions have an attribute ``__text_signature__``, +which gives the signature of the function as plain text. +The default values are evaluated by ``ast.literal_eval``. +Because of this, it supports only a small number of standard Python classes +and not arbitrary Python objects. + +And even if ``__text_signature__`` would allow arbitrary signatures somehow, +that is only one piece of introspection: +it does not help with ``inspect.getsourcefile`` for example. + +defined_function versus function +-------------------------------- + +In many places, a decision needs to be made whether the old ``function`` class +should be replaced by ``defined_function`` or the new ``function`` class. +This is done by thinking of the most likely use case: + +1. ``types.FunctionType`` refers to ``function`` because that + type might be used to construct instances using ``types.FunctionType(...)``. + +2. ``inspect.isfunction()`` refers to ``defined_function`` + because this is the class where introspection is supported. + +3. The C API functions must refer to ``function`` because + we do not specify how the various attributes of ``defined_function`` + are implemented. + We expect that this is not a problem since there is typically no + reason for introspection to be done by C extensions. + +Scope of this PEP: which classes are involved? +---------------------------------------------- + +The main motivation of this PEP is fixing function classes, +so we certainly want to unify the existing classes +``builtin_function_or_method`` and ``function``. + +Since built-in functions and methods have the same class, +it seems natural to include bound methods too. +And since there are no "unbound methods" for Python functions, +it makes sense to get rid of unbound methods for extension types. + +For now, no changes are made to the classes ``staticmethod``, +``classmethod`` and ``classmethod_descriptor``. +It would certainly make sense to put these in the ``base_function`` +class hierarchy and unify ``classmethod`` and ``classmethod_descriptor``. +However, this PEP is already big enough +and this is left as a possible future improvement. + +Slot wrappers for extension types like ``__init__`` or ``__eq__`` +are quite different from normal methods. +They are also typically not called directly because you would normally +write ``foo[i]`` instead of ``foo.__getitem__(i)``. +So these are left outside the scope of this PEP. + +Python also has an ``instancemethod`` class, +which seems to be a relic from Python 2, +where it was used for bound and unbound methods. +It is not clear whether there is still a use case for it. +In any case, there is no reason to deal with it in this PEP. + +**TODO**: should ``instancemethod`` be deprecated? +It doesn't seem used at all within CPython 3.7, +but maybe external packages use it? + +Not treating METH_STATIC and METH_CLASS +--------------------------------------- + +Almost nothing in this PEP refers to the flags ``METH_STATIC`` and ``METH_CLASS``. +These flags are checked only by the `automatic creation of built-in functions`_. +When a ``staticmethod``, ``classmethod`` or ``classmethod_descriptor`` +is bound (i.e. ``__get__`` is called), +a ``base_function`` instance is created with ``m_self != NULL``. +For a ``classmethod``, this is obvious since ``m_self`` +is the class that the method is bound to. +For a ``staticmethod``, one can take an arbitrary Python object for ``m_self``. +For backwards compatibility, we choose ``m_self = __parent__`` for static methods +of extension types. + +__self__ in base_function +------------------------- + +It may look strange at first sight to add the ``__self__`` slot +in ``base_function`` as opposed to ``bound_method``. +We took this idea from the existing ``builtin_function_or_method`` class. +It allows us to have a single general implementation of ``__call__`` and ``__get__`` +for the various function classes discussed in this PEP. + +It also makes it easy to support existing built-in functions +which set ``__self__`` to the module (for example, ``sys.exit.__self__`` is ``sys``). + +Two implementations of __doc__ +------------------------------ + +``base_function`` does not support function docstrings. +Instead, the classes ``cfunction`` and ``function`` +each have their own way of dealing with docstrings +(and ``bound_method`` just takes the ``__doc__`` from the wrapped function). + +For ``cfunction``, the docstring is stored (together with the text signature) +as C string in the read-only ``ml_doc`` field of a ``PyMethodDef``. +For ``function``, the docstring is stored as a writable Python object +and it does not actually need to be a string. +It looks hard to unify these two very different ways of dealing with ``__doc__``. +For backwards compatibility, we keep the existing implementations. + +For ``defined_function``, we require ``__doc__`` to be implemented +but we do not specify how. A subclass can implement ``__doc__`` the +same way as ``cfunction`` or using a struct member or some other way. + +Subclassing +----------- + +We disallow subclassing of ``cfunction`` and ``bound_method`` +to enable fast type checks for ``PyCFunction_Check`` and ``PyMethod_Check``. + +We allow subclassing of the other classes because there is no reason to disallow it. +For Python modules, the only relevant class to subclass is +``function`` because the others cannot be instantiated anyway. + +Replacing tp_call: METH_PASS_FUNCTION and METH_CALL_UNBOUND +----------------------------------------------------------- + +The new flags ``METH_PASS_FUNCTION`` and ``METH_CALL_UNBOUND`` +are meant to support cases where formerly a custom ``tp_call`` was used. +It reduces the number of special fast paths in ``Python/ceval.c`` +for calling objects: +instead of treating Python functions, built-in functions and method descriptors +separately, there would only be a single check. + +The signature of ``tp_call`` is essentially the signature +of ``PyBaseFunctionObject.m_ml.ml_meth`` with flags +``METH_VARARGS | METH_KEYWORDS | METH_PASS_FUNCTION | METH_CALL_UNBOUND`` +(the only difference is an added ``self`` argument). +Therefore, it should be easy to change existing ``tp_call`` slots +to use the ``base_function`` implementation instead. + +It also makes sense to use ``METH_PASS_FUNCTION`` without ``METH_CALL_UNBOUND`` +in cases where the C function simply needs access to additional metadata +from the function, such as the ``__parent__``. +This is for example needed to support PEP 573. +Converting existing methods to use ``METH_PASS_FUNCTION`` is trivial: +it only requires adding an extra argument to the C function. + + +Backwards compatibility +======================= + +While designing this PEP, great care was taken to not break +backwards compatibility too much. +Most of the potentially incompatible changes +are changes to CPython implementation details +which are different anyway in other Python interpreters. +In particular, Python code which correctly runs on PyPy +will very likely continue to work with this PEP. + +The standard classes and functions like +``staticmethod``, ``functools.partial`` or ``operator.methodcaller`` +do not need to change at all. + +Changes to types and inspect +---------------------------- + +The proposed changes to ``types`` and ``inspect`` +are meant to minimize changes in behaviour. +However, it is unavoidable that some things change +and this can cause code which uses ``types`` or ``inspect`` to break. +In the Python standard library for example, +changes are needed in the ``doctest`` module because of this. + +Also, tools which take various kinds of functions as input will need to deal +with the new function hieararchy and the possibility of custom +function classes. + +Python functions +---------------- + +For Python functions, essentially nothing changes. +The attributes that existed before still exist and Python functions +can be initialized, called and turned into methods as before. + +The name ``function`` is kept for backwards compatibility. +While it might make sense to change the name to something more +specific like ``python_function``, +that would require a lot of annoying changes in documentation and testsuites. + +Built-in functions of a module +------------------------------ + +Also for built-in functions, nothing changes. +We keep the old behaviour that such functions do not bind as methods. +This is a consequence of the fact that ``__self__`` is set to the module. + +Built-in bound and unbound methods +---------------------------------- + +The types of built-in bound and unbound methods will change. +However, this does not affect calling such methods +because the protocol in ``base_function.__call__`` +(in particular the handling of ``__objclass__`` and self slicing) +was specifically designed to be backwards compatible. +All attributes which existed before (like ``__objclass__`` and ``__self__``) +still exist. + +New attributes +-------------- + +Some objects get new special double-underscore attributes. +For example, the new attribute ``__parent__`` appears on +all built-in functions and all methods get a ``__func__`` attribute. +The fact that ``__self__`` is now a special read-only attribute +for Python functions caused trouble in [#bpo33265]_. +Generally, we expect that not much will break though. + +method_descriptor and PyDescr_NewMethod +--------------------------------------- + +The class ``method_descriptor`` and the constructor ``PyDescr_NewMethod`` +should be deprecated. +They are no longer used by CPython itself but are still supported. + + +Two-phase Implementation +======================== + +**TODO**: this section is optional. +If this PEP is accepted, it should +be decided whether to apply this two-phase implementation or not. + +As mentioned above, the `changes to types and inspect`_ can break some +existing code. +In order to further minimize breakage, this PEP could be implemented +in two phases. + +Phase one: keep existing classes but add base classes +----------------------------------------------------- + +Initially, implement the ``base_function`` class +and use it as common base class but otherwise keep the existing classes +(but not their implementation). + +In this proposal, the class hierarchy would become:: + + object + | + | + base_function + / | \ + / | \ + / | \ + cfunction | defined_function + | | | \ + | | bound_method \ + | | \ + | method_descriptor function + | + builtin_function_or_method + +The leaf classes ``builtin_function_or_method``, ``method_descriptor``, +``bound_method`` and ``function`` correspond to the existing classes +(with ``method`` renamed to ``bound_method``). + +Automatically created functions created in modules become instances +of ``builtin_function_or_method``. +Unbound methods of extension types become instances of ``method_descriptor``. + +The class ``method_descriptor`` is a copy of ``cfunction`` except +that ``__get__`` returns a ``builtin_function_or_method`` instead of a +``bound_method``. + +The class ``builtin_function_or_method`` has the same C structure as a +``bound_method``, but it inherits from ``cfunction``. +The ``__func__`` attribute is not mandatory: +it is only defined when binding a ``method_descriptor``. + +We keep the implementation of the ``inspect`` functions as they are. +Because of this and because the existing classes are kept, +backwards compatibility is ensured for code doing type checks. + +Since showing an actual ``DeprecationWarning`` would affect a lot +of correctly-functioning code, +any deprecations would only appear in the documentation. +Another reason is that it is hard to show warnings for calling ``isinstance(x, t)`` +(but it could be done using ``__instancecheck__`` hacking) +and impossible for ``type(x) is t``. + +Phase two +--------- + +Phase two is what is actually described in the rest of this PEP. +In terms of implementation, +it would be a relatively small change compared to phase one. + + +Reference Implementation +======================== + +Most of this PEP has been implemented for CPython at +https://github.com/jdemeyer/cpython/tree/pep575 + +There are four steps, corresponding to the commits on that branch. +After each step, CPython is in a mostly working state. + +1. Add the ``base_function`` class and make it a subclass for ``cfunction``. + This is by far the biggest step as the complete ``__call__`` protocol + is implemented in this step. + +2. Rename ``method`` to ``bound_method`` and make it a subclass of ``base_function``. + Change unbound methods of extension types to be instances of ``cfunction`` + such that bound methods of extension types are also instances of ``bound_method``. + +3. Implement ``defined_function`` and ``function``. + +4. Changes to other parts of Python, such as the standard library and testsuite. + + +Appendix: current situation +=========================== + +**NOTE**: +This section is more useful during the draft period of the PEP, +so feel free to remove this once the PEP has been accepted. + +For reference, we describe in detail the relevant existing classes in CPython 3.7. + +Each of the classes involved is an "orphan" class +(no non-trivial subclasses nor superclasses). + +builtin_function_or_method: built-in functions and bound methods +---------------------------------------------------------------- + +These are of type `PyCFunction_Type `_ +with structure `PyCFunctionObject `_:: + + typedef struct { + PyObject_HEAD + PyMethodDef *m_ml; /* Description of the C function to call */ + PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */ + PyObject *m_module; /* The __module__ attribute, can be anything */ + PyObject *m_weakreflist; /* List of weak references */ + } PyCFunctionObject; + + struct PyMethodDef { + const char *ml_name; /* The name of the built-in function/method */ + PyCFunction ml_meth; /* The C function that implements it */ + int ml_flags; /* Combination of METH_xxx flags, which mostly + describe the args expected by the C func */ + const char *ml_doc; /* The __doc__ attribute, or NULL */ + }; + +where ``PyCFunction`` is a C function pointer (there are various forms of this, the most basic +takes two arguments for ``self`` and ``*args``). + +This class is used both for functions and bound methods: +for a method, the ``m_self`` slot points to the object:: + + >>> dict(foo=42).get + + >>> dict(foo=42).get.__self__ + {'foo': 42} + +In some cases, a function is considered a "method" of the module defining it:: + + >>> import os + >>> os.kill + + >>> os.kill.__self__ + + +method_descriptor: built-in unbound methods +------------------------------------------- + +These are of type `PyMethodDescr_Type `_ +with structure `PyMethodDescrObject `_:: + + typedef struct { + PyDescrObject d_common; + PyMethodDef *d_method; + } PyMethodDescrObject; + + typedef struct { + PyObject_HEAD + PyTypeObject *d_type; + PyObject *d_name; + PyObject *d_qualname; + } PyDescrObject; + +function: Python functions +-------------------------- + +These are of type `PyFunction_Type `_ +with structure `PyFunctionObject `_:: + + typedef struct { + PyObject_HEAD + PyObject *func_code; /* A code object, the __code__ attribute */ + PyObject *func_globals; /* A dictionary (other mappings won't do) */ + PyObject *func_defaults; /* NULL or a tuple */ + PyObject *func_kwdefaults; /* NULL or a dict */ + PyObject *func_closure; /* NULL or a tuple of cell objects */ + PyObject *func_doc; /* The __doc__ attribute, can be anything */ + PyObject *func_name; /* The __name__ attribute, a string object */ + PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ + PyObject *func_weakreflist; /* List of weak references */ + PyObject *func_module; /* The __module__ attribute, can be anything */ + PyObject *func_annotations; /* Annotations, a dict or NULL */ + PyObject *func_qualname; /* The qualified name */ + + /* Invariant: + * func_closure contains the bindings for func_code->co_freevars, so + * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) + * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). + */ + } PyFunctionObject; + +In Python 3, there is no "unbound method" class: +an unbound method is just a plain function. + +method: Python bound methods +---------------------------- + +These are of type `PyMethod_Type `_ +with structure `PyMethodObject `_:: + + typedef struct { + PyObject_HEAD + PyObject *im_func; /* The callable object implementing the method */ + PyObject *im_self; /* The instance it is bound to */ + PyObject *im_weakreflist; /* List of weak references */ + } PyMethodObject; + + +References +========== + +.. [#cython] Cython (http://cython.org/) + +.. [#bpo30071] Python bug 30071, Duck-typing inspect.isfunction() (https://bugs.python.org/issue30071) + +.. [#bpo33261] Python bug 33261, inspect.isgeneratorfunction fails on hand-created methods + (https://bugs.python.org/issue33261 and https://github.com/python/cpython/pull/6448) + +.. [#bpo33265] Python bug 33265, contextlib.ExitStack abuses __self__ + (https://bugs.python.org/issue33265 and https://github.com/python/cpython/pull/6456) + +.. [#ABI] PEP 384, Defining a Stable ABI, Löwis (https://www.python.org/dev/peps/pep-0384) + +.. [#clinic] PEP 436, The Argument Clinic DSL, Hastings (https://www.python.org/dev/peps/pep-0436) + +.. [#methoddoc] PyMethodDef documentation (https://docs.python.org/3.7/c-api/structures.html#c.PyMethodDef) + +.. [#proposal] PEP proposal: unifying function/method classes (https://mail.python.org/pipermail/python-ideas/2018-March/049398.html) + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0576.rst b/pep-0576.rst new file mode 100644 index 000000000..01a4090fe --- /dev/null +++ b/pep-0576.rst @@ -0,0 +1,161 @@ +PEP: 576 +Title: Rationalize Built-in function classes +Author: Mark Shannon +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 10-May-2018 +Python-Version: 3.8 +Post-History: 17-May-2018 + 23-June-2018 + 08-July-2018 + +Abstract +======== + +Expose the "FastcallKeywords" convention used internally by CPython to third-party code, and make the ``inspect`` module use duck-typing. +In combination this will allow third-party C extensions and tools like Cython to create objects that use the same calling conventions as built-in and Python functions, thus gaining performance parity with built-in functions like ``len`` or ``print``. + +A small improvement in the performance of existing code is expected. + +Motivation +========== + +Currently third-party module authors face a dilemna when implementing +functions in C. Either they can use one of the pre-existing built-in function +or method classes or implement their own custom class in C. +The first choice causes them to lose the ability to access the internals of the callable object. +The second choice is an additional maintenance burden and, more importantly, +has a significant negative impact on performance. + +This PEP aims to allow authors of third-party C modules, and tools like to Cython, to utilize the faster calling convention used internally by CPython for built-in functions and methods, and to do so without a loss of capabilities relative to a function implemented in Python. + +Introspection +------------- + +The inspect module will fully support duck-typing when introspecting callables. + +The ``inspect.Signature.from_callable()`` function computes the signature of a callable. If an object has a ``__signature__`` +property, then ``inspect.Signature.from_callable()`` simply returns that. To further support duck-typing, if a callable has a ``__text_signature__`` +then the ``__signature__`` will be created from that. + +This means that 3rd party builtin-functions can implement ``__text_signature__`` if sufficient, +and the more expensive ``__signature__`` if necessary. + +Efficient calls to third-party callables +---------------------------------------- + +Currently the majority of calls are dispatched to ``function``\s and ``method_descriptor``\s in custom code, using the "FastcallKeywords" internal calling convention. This PEP proposes that this calling convention is implemented via a C function pointer. Third-party callables which implement this binary interface will have the potential to be called as fast as a built-in function. + +Continued prohibition of callable classes as base classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently any attempt to use ``function``, ``method`` or ``method_descriptor`` as a base class for a new class will fail with a ``TypeError``. This behaviour is desirable as it prevents errors when a subclass overrides the ``__call__`` method. If callables could be sub-classed then any call to a ``function`` or a ``method_descriptor`` would need an additional check that the ``__call__`` method had not been overridden. By exposing an additional call mechanism, the potential for errors becomes greater. As a consequence, any third-partyy class implementing the addition call interface will not be usable as a base class. + + +New classes and changes to existing classes +=========================================== + +Python visible changes +---------------------- + +#. A new built-in class, ``builtin_function``, will be added. + +#. ``types.BuiltinFunctionType`` will refer to ``builtin_function`` not ``builtin_function_or_method``. + +#. Instances of the ``builtin_function`` class will retain the ``__module__`` property of ``builtin_function_or_method`` and gain the ``func_module`` and ``func_globals`` properties. The ``func_module`` allows access to the module to which the function belongs. Note that this is different from the ``__module__`` property which merely returns the name of the module. The ``func_globals`` property is equivalent to ``func_module.__dict__`` and is provided to mimic the Python function property of the same name. + +#. When binding a ``method_descriptor`` instance to an instance of its owning class, a ``bound_method`` will be created instead of a ``builtin_function_or_method``. This means that the ``method_descriptors`` now mimic the behaviour of Python functions more closely. In other words, ``[].append`` becomes a ``bound_method`` instead of a ``builtin_function_or_method``. + + +C API changes +------------- + +#. A new function ``PyBuiltinFunction_New(PyMethodDef *ml, PyObject *module)`` is added to create built-in functions. + +#. ``PyCFunction_NewEx()`` and ``PyCFunction_New()`` are deprecated and will return a ``PyBuiltinFunction`` if able, otherwise a ``builtin_function_or_method``. + +Retaining backwards compatibility in the C API and ABI +====================================================== + +The proposed changes are fully backwards and forwards compatible at both the API and ABI level. + + +Internal C changes +------------------ + +Two new flags will be allowed for the ``typeobject.tp_flags`` field. +These are ``Py_TPFLAGS_EXTENDED_CALL`` and ``Py_TPFLAGS_FUNCTION_DESCRIPTOR`` + +Py_TPFLAGS_EXTENDED_CALL +~~~~~~~~~~~~~~~~~~~~~~~~ + +For any built-in class that sets ``Py_TPFLAGS_EXTENDED_CALL`` +The C struct corresponding to this built-in class must begin with the struct ``PyExtendedCallable`` which is defined as follows:: + + typedef PyObject *(*extended_call_ptr)(PyObject *callable, PyObject** args, + int positional_argcount, PyTupleObject* kwnames); + + typedef struct { + PyObject_HEAD + extended_call_ptr ext_call; + } PyExtendedCallable; + +Any class that sets the ``Py_TPFLAGS_EXTENDED_CALL`` cannot be used as a base class and a TypeError will be raised if any Python code tries to use it a base class. + + +Py_TPFLAGS_FUNCTION_DESCRIPTOR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If this flag is set for a built-in class ``F``, then instances of that class are expected to behave the same as a Python function when used as a class attribute. +Specifically, this mean that the value of ``c.m`` where ``C.m`` is an instanceof the built-in class ``F`` (and ``c`` is an instance of ``C``) must be a bound-method binding ``C.m`` and ``c``. +Without this flag, it would be impossible for custom callables to behave like Python functions *and* be efficient as Python or built-in functions. + + + +Changes to existing C structs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``function``, ``method_descriptor`` and ``method`` classes will have their corresponding structs changed to +start with the ``PyExtendedCallable`` struct. + +Third-party built-in classes using the new extended call interface +------------------------------------------------------------------ + +To enable call performance on a par with Python functions and built-in functions, third-party callables should set the ``Py_TPFLAGS_EXTENDED_CALL`` bit of ``tp_flags`` and ensure that the corresponding C struct starts with the ``PyExtendedCallable``. +Any built-in class that has the ``Py_TPFLAGS_EXTENDED_CALL`` bit set must also implement the ``tp_call`` function and make sure its behaviour is consistent with the ``ext_call`` function. + +Performance implications of these changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Adding a function pointer to each callable, rather than each class of callable, enables the choice of dispatching function (the code to shuffle arguments about and do error checking) to be made when the callable object is created rather than when it is called. This should reduce the number of instructions executed between the call-site in the interpreter and the execution of the callee. + + +Alternative Suggestions +======================= + +PEP 580 is an alternative approach to solving the same problem as this PEP. + + + +Reference implementation +======================== + +A draft implementation can be found at https://github.com/markshannon/cpython/tree/pep-576-minimal + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0577.rst b/pep-0577.rst new file mode 100644 index 000000000..79fff96e7 --- /dev/null +++ b/pep-0577.rst @@ -0,0 +1,826 @@ +PEP: 577 +Title: Augmented Assignment Expressions +Author: Nick Coghlan +Status: Withdrawn +Type: Standards Track +Content-Type: text/x-rst +Created: 14-May-2018 +Python-Version: 3.8 +Post-History: 22-May-2018 + + +PEP Withdrawal +============== + +While working on this PEP, I realised that it didn't really address what was +actually bothering me about PEP 572's proposed scoping rules for previously +unreferenced assignment targets, and also had some significant undesirable +consequences (most notably, allowing ``>>=` and ``<<=`` as inline augmented +assignment operators that meant something entirely different from the ``>=`` +and ``<=`` comparison operators). + +I also realised that even without dedicated syntax of their own, PEP 572 allows +inline augmented assignments to be written using the ``operator`` module:: + + from operator import iadd + if (target := iadd(target, value)) < limit: + ... + +(The restriction to simple names as inline assignment targets means that the +target expession can always be repeated without side effects) + +Accordingly, I'm withdrawing this PEP without submitting it for pronouncement, +and will instead be writing a replacement PEP that focuses specifically on the +handling of assignment targets which haven't already been declared as local +variables in the current scope (for both regular block scopes, and for scoped +expressions). + + +Abstract +======== + +This is a proposal to allow augmented assignments such as ``x += 1`` to be +used as expressions, not just statements. + +As part of this, ``NAME := EXPR`` is proposed as an inline assignment expression +that uses the new augmented assignment scoping rules, rather than implicitly +defining a new local variable name the way that existing name binding +statements do. The question of allowing expression level local variable +declarations at function scope is deliberately separated from the question of +allowing expression level name bindings, and deferred to a later PEP. + +This PEP is a direct competitor to PEP 572 (although it borrows heavily from that +PEP's motivation, and even shares the proposed syntax for inline assignments). +See `Relationship with PEP 572`_ for more details on the connections between +the two PEPs. + +To improve the usability of the new expressions, a semantic split is proposed +between the handling of augmented assignments in regular block scopes (modules, +classes, and functions), and the handling of augmented assignments in scoped +expressions (lambda expressions, generator expressions, and comprehensions), +such that all inline assignments default to targeting the nearest containing +block scope. + +A new compile time ``TargetNameError`` is added as a subclass of ``SyntaxError`` +to handle cases where it is deemed to be currently unclear which target is +expected to be rebound by an inline assignment, or else the target scope +for the inline assignment is considered invalid for another reason. + + +Syntax and semantics +==================== + +Augmented assignment expressions +-------------------------------- + +The language grammar would be adjusted to allow augmented assignments to +appear as expressions, where the result of the augmented assignment +expression is the same post-calculation reference as is being bound to the +given target. + +For example:: + + >>> n = 0 + >>> n += 5 + 5 + >>> n -= 2 + 3 + >>> n *= 3 + 9 + >>> n + 9 + +For mutable targets, this means the result is always just the original object:: + + >>> seq = [] + >>> seq_id = id(seq) + >>> seq += range(3) + [0, 1, 2] + >>> seq_id == id(seq) + True + +Augmented assignments to attributes and container subscripts will be permitted, +with the result being the post-calculation reference being bound to the target, +just as it is for simple name targets:: + + def increment(self, step=1): + return self._value += step + +In these cases, ``__getitem__`` and ``__getattribute__`` will *not* be called +after the assignment has already taken place (they will only be called as +needed to evaluate the in-place operation). + + +Adding an inline assignment operator +------------------------------------ + +Given only the addition of augmented assignment expressions, it would be +possible to abuse a symbol like ``|=`` as a general purpose assignment +operator by defining a ``Target`` wrapper type that worked as follows:: + + >>> class Target: + ... def __init__(self, value): + ... self.value = value + ... def __or__(self, other): + ... return Target(other) + ... + >>> x = Target(10) + >>> x.value + 10 + >>> x |= 42 + <__main__.Target object at 0x7f608caa8048> + >>> x.value + 42 + +This is similar to the way that storing a single reference in a list was long +used as a workaround for the lack of a ``nonlocal`` keyword, and can still be +used today (in combination with ``operator.itemsetter``) to work around the +lack of expression level assignments. + +Rather than requiring such workarounds, this PEP instead proposes that +PEP 572's "NAME := EXPR" syntax be adopted as a new inline assignment +expression that uses the augmented assignment scoping rules described below. + +This cleanly handles cases where only the new value is of interest, and the +previously bound value (if any) can just be discarded completely. + +Note that for both simple names and complex assignment targets, the inline +assignment operator does *not* read the previous reference before assigning +the new one. However, when used at function scope (either directly or inside +a scoped expression), it does *not* implicitly define a new local variable, +and will instead raise ``TargetNameError`` (as described for augmented +assignments below). + + +Assignment operator precedence +------------------------------ + +To preserve the existing semantics of augmented assignment statements, +inline assignment operators will be defined as being of lower precedence +than all other operators, include the comma pseudo-operator. This ensures +that when used as a top level expression the entire right hand side of the +expression is still interpreted as the value to be processed (even when that +value is a tuple without parentheses). + +The difference this introduces relative to PEP 572 is that where +``(n := first, second)`` sets ``n = first`` in PEP 572, in this PEP it would set +``n = (first, second)`, and getting the first meaning would require an extra +set of parentheses (``((n := first), second)``). + +PEP 572 quite reasonably notes that this results in ambiguity when assignment +expressions are used as function call arguments. This PEP resolves that concern +a different way by requiring that assignment expressions be parenthesised +when used as arguments to a function call (unless they're the sole argument). + +This is a more relaxed version of the restriction placed on generator +expressions (which always require parentheses, except when they're the sole +argument to a function call). + + +Augmented assignment to names in block scopes +--------------------------------------------- + +No target name binding changes are proposed for augmented assignments at module +or class scope (this also includes code executed using "exec" or "eval"). These +will continue to implicitly declare a new local variable as the binding target +as they do today, and (if necessary) will be able to resolve the name from an +outer scope before binding it locally. + +At function scope, augmented assignments will be changed to require that there +be either a preceding name binding or variable declaration to explicitly +establish the target name as being local to the function, or else an explicit +``global`` or ``nonlocal`` declaration. ``TargetNameError``, a new +``SyntaxError`` subclass, will be raised at compile time if no such binding or +declaration is present. + +For example, the following code would compile and run as it does today:: + + x = 0 + x += 1 # Sets global "x" to 1 + + class C: + x += 1 # Sets local "x" to 2, leaves global "x" alone + + def local_target(): + x = 0 + x += 1 # Sets local "x" to 1, leaves global "x" alone + + def global_target(): + global x + x += 1 # Increments global "x" each time this runs + + def nonlocal_target(): + x = 0 + def g(): + nonlocal x + x += 1 # Increments "x" in outer scope each time this runs + return x + return g + +The follow examples would all still compile and then raise an error at runtime +as they do today:: + + n += 1 # Raises NameError at runtime + + class C: + n += 1 # Raises NameError at runtime + + def missing_global(): + global n + n += 1 # Raises NameError at runtime + + def delayed_nonlocal_initialisation(): + def f(): + nonlocal n + n += 1 + f() # Raises NameError at runtime + n = 0 + + def skipped_conditional_initialisation(): + if False: + n = 0 + n += 1 # Raises UnboundLocalError at runtime + + def local_declaration_without_initial_assignment(): + n: typing.Any + n += 1 # Raises UnboundLocalError at runtime + +Whereas the following would raise a compile time ``DeprecationWarning`` +initially, and eventually change to report a compile time ``TargetNameError``:: + + def missing_target(): + x += 1 # Compile time TargetNameError due to ambiguous target scope + # Is there a missing initialisation of "x" here? Or a missing + # global or nonlocal declaration? + +As a conservative implementation approach, the compile time function name +resolution change would be introduced as a ``DeprecationWarning`` in Python +3.8, and then converted to ``TargetNameError`` in Python 3.9. This avoids +potential problems in cases where an unused function would currently raise +``UnboundLocalError`` if it was ever actually called, but the code is actually +unused - converting that latent runtime defect to a compile time error qualifies +as a backwards incompatible change that requires a deprecation period. + +When augmented assignments are used as expressions in function scope (rather +than as standalone statements), there aren't any backwards compatibility +concerns, so the compile time name binding checks would be enforced immediately +in Python 3.8. + +Similarly, the new inline assignment expressions would always require explicit +predeclaration of their target scope when used as part of a function, at least +for Python 3.8. (See the design discussion section for notes on potentially +revisiting that restriction in the future). + + +Augmented assignment to names in scoped expressions +--------------------------------------------------- + +Scoped expressions is a new collective term being proposed for expressions that +introduce a new nested scope of execution, either as an intrinsic part of their +operation (lambda expressions, generator expressions), or else as a way of +hiding name binding operations from the containing scope (container +comprehensions). + +Unlike regular functions, these scoped expressions can't include explicit +``global`` or ``nonlocal`` declarations to rebind names directly in an outer +scope. + +Instead, their name binding semantics for augmented assignment expressions would +be defined as follows: + +* augmented assignment targets used in scoped expressions are expected to either + be already bound in the containing block scope, or else have their scope + explicitly declared in the containing block scope. If no suitable name + binding or declaration can be found in that scope, then ``TargetNameError`` + will be raised at compile time (rather than creating a new binding within + the scoped expression). +* if the containing block scope is a function scope, and the target name is + explicitly declared as ``global`` or ``nonlocal``, then it will be use the + same scope declaration in the body of the scoped expression +* if the containing block scope is a function scope, and the target name is + a local variable in that function, then it will be implicitly declared as + ``nonlocal`` in the body of the scoped expression +* if the containing block scope is a class scope, than ``TargetNameError`` will + always be raised, with a dedicated message indicating that combining class + scopes with augmented assignments in scoped expressions is not currently + permitted. +* if a name is declared as a formal parameter (lambda expressions), or as an + iteration variable (generator expressions, comprehensions), then that name + is considered local to that scoped expression, and attempting to use it as + the target of an augmented assignment operation in that scope, or any nested + scoped expression, will raise ``TargetNameError`` (this is a restriction that + could potentially be lifted later, but is being proposed for now to simplify + the initial set of compile time and runtime semantics that needs to be + covered in the language reference and handled by the compiler and interpreter) + +For example, the following code would work as shown:: + + >>> global_target = 0 + >>> incr_global_target = lambda: global_target += 1 + >>> incr_global_target() + 1 + >>> incr_global_target() + 2 + >>> global_target + 2 + >>> def cumulative_sums(data, start=0) + ... total = start + ... yield from (total += value for value in data) + ... return total + ... + >>> print(list(cumulative_sums(range(5)))) + [0, 1, 3, 6, 10] + +While the following examples would all raise ``TargetNameError``:: + + class C: + cls_target = 0 + incr_cls_target = lambda: cls_target += 1 # Error due to class scope + + def missing_target(): + incr_x = lambda: x += 1 # Error due to missing target "x" + + def late_target(): + incr_x = lambda: x += 1 # Error due to "x" being declared after use + x = 1 + + lambda arg: arg += 1 # Error due to attempt to target formal parameter + + [x += 1 for x in data] # Error due to attempt to target iteration variable + + +As augmented assignments currently can't appear inside scoped expressions, the +above compile time name resolution exceptions would be included as part of the +initial implementation rather than needing to be phased in as a potentially +backwards incompatible change. + + +Design discussion +================= + +Allowing complex assignment targets +----------------------------------- + +The initial drafts of this PEP kept PEP 572's restriction to single name targets +when augmented assignments were used as expressions, allowing attribute and +subscript targets solely for the statement form. + +However, enforcing that required varying the permitted targets based on whether +or not the augmented assignment was a top level expression or not, as well as +explaining why ``n += 1``, ``(n += 1)``, and ``self.n += 1`` were all legal, +but ``(self.n += 1)`` was prohibited, so the proposal was simplified to allow +all existing augmented assignment targets for the expression form as well. + +Since this PEP defines ``TARGET := EXPR`` as a variant on augmented assignment, +that also gained support for assignment and subscript targets. + + +Augmented assignment or name binding only? +------------------------------------------ + +PEP 572 makes a reasonable case that the potential use cases for inline +augmented assignment are notably weaker than those for inline assignment in +general, so it's acceptable to require that they be spelled as ``x := x + 1``, +bypassing any in-place augmented assignment methods. + +While this is at least arguably true for the builtin types (where potential +counterexamples would probably need to focus on set manipulation use cases +that the PEP author doesn't personally have), it would also rule out more +memory intensive use cases like manipulation of NumPy arrays, where the data +copying involved in out-of-place operations can make them impractical as +alternatives to their in-place counterparts. + +That said, this PEP mainly exists because the PEP author found the inline +assignment proposal much easier to grasp as "It's like ``+=``, only skipping +the addition step", and also liked the way that that framing provides an +actual semantic difference between ``NAME = EXPR`` and ``NAME := EXPR`` at +function scope. + +That difference in target scoping behaviour means that the ``NAME := EXPR`` +syntax would be expected to have two primary use cases: + +* as a way of allowing assignments to be embedded as an expression in an ``if`` + or ``while`` statement, or as part of a scoped expression +* as a way of requesting a compile time check that the target name be previously + declared or bound in the current function scope + +At module or class scope, ``NAME = EXPR`` and ``NAME := EXPR`` would be +semantically equivalent due to the compiler's lack of visibility into the set +of names that will be resolvable at runtime, but code linters and static +type checkers would be encouraged to enforce the same "declaration or assignment +required before use" behaviour for ``NAME := EXPR`` as the compiler would +enforce at function scope. + + +Postponing a decision on expression level target declarations +------------------------------------------------------------- + +At least for Python 3.8, usage of inline assignments (whether augmented or not) +at function scope would always require a preceding name binding or scope +declaration to avoid getting ``TargetNameError``, even when used outside a +scoped expression. + +The intent behind this requirement is to clearly separate the following two +language design questions: + +1. Can an expression rebind a name in the current scope? +2. Can an expression declare a new name in the current scope? + +For module global scopes, the answer to both of those questions is unequivocally +"Yes", because it's a language level guarantee that mutating the ``globals()`` +dict will immediately impact the runtime module scope, and ``global NAME`` +declarations inside a function can have the same effect (as can importing the +currently executing module and modifying its attributes). + +For class scopes, the answer to both questions is also "Yes" in practice, +although less unequivocally so, since the semantics of ``locals()`` are +currently formally unspecified. However, if the current behaviour of ``locals()`` +at class scope is taken as normative (as PEP 558 proposes), then this is +essentially the same scenario as manipulating the module globals, just using +``locals()`` instead. + +For function scopes, however, the current answers to these two questions are +respectively "Yes" and "No". Expression level rebinding of function locals is +already possible thanks to lexically nested scopes and explicit ``nonlocal NAME`` +expressions. While this PEP will likely make expression level rebinding more +common than it is today, it isn't a fundamentally new concept for the language. + +By contrast, declaring a *new* function local variable is currently a statement +level action, involving one of: + +* an assignment statement (``NAME = EXPR``, ``OTHER_TARGET = NAME = EXPR``, etc) +* a variable declaration (``NAME : EXPR``) +* a nested function definition +* a nested class definition +* a ``for`` loop +* a ``with`` statement +* an ``except`` clause (with limited scope of access) + +The historical trend for the language has actually been to *remove* support for +expression level declarations of function local names, first with the +introduction of "fast locals" semantics (which made the introduction of names +via ``locals()`` unsupported for function scopes), and again with the hiding +of comprehension iteration variables in Python 3.0. + +Now, it may be that in Python 3.9, we decide to revisit this question based on +our experience with expression level name binding in Python 3.8, and decide that +we really do want expression level function local variable declarations as well, +and that we want ``NAME := EXPR`` to be the way we spell that (rather than, +for example, spelling inline declarations more explicitly as +``NAME := EXPR given NAME``, which would permit them to carry type annotations, +and also permit them to declare new local variables in scoped expressions, +rather than having to pollute the namespace in their containing scope). + +But the proposal in this PEP is that we explicitly give ourselves a full +release to decide how much we want that feature, and exactly where we find +its absence irritating. Python has survived happily without expression level +name bindings *or* declarations for decades, so we can afford to give ourselves +a couple of years to decide if we really want *both* of those, or if expression +level bindings are sufficient. + + +Ignoring scoped expressions when determining augmented assignment targets +------------------------------------------------------------------------- + +When discussing possible binding semantics for PEP 572's assignment expressions, +Tim Peters made a plausible case [1_,2_,3_] for assignment expressions targeting +the containing block scope, essentially ignoring any intervening scoped +expressions. + +This approach allows use cases like cumulative sums, or extracting the final +value from a generator expression to be written in a relatively straightforward +way:: + + total = 0 + partial_sums = [total := total + value for value in data] + + factor = 1 + while any(n % (factor := p) == 0 for p in small_primes): + n //= factor + +Guido also expressed his approval for this general approach [4_]. + +The proposal in this PEP differs from Tim's original proposal in three main +areas: + +- it applies the proposal to all augmented assignment operators, not just a + single new name binding operator +- as far as is practical, it extends the augmented assignment requirement that + the name already be defined to the new name binding operator (raising + ``TargetNameError`` rather than implicitly declaring new local variables at + function scope) +- it includes lambda expressions in the set of scopes that get ignored for + target name binding purposes, making this transparency to assignments common + to all of the scoped expressions rather than being specific to comprehensions + and generator expressions + +With scoped expressions being ignored when calculating binding targets, it's +once again difficult to detect the scoping difference between the outermost +iterable expressions in generator expressions and comprehensions (you have to +mess about with either class scopes or attempting to rebind iteration Variables +to detect it), so there's also no need to tinker with that. + + +Treating inline assignment as an augmented assignment variant +------------------------------------------------------------- + +One of the challenges with PEP 572 is the fact that ``NAME = EXPR`` and +``NAME := EXPR`` are entirely semantically equivalent at every scope. This +makes the two forms hard to teach, since there's no inherent nudge towards +choosing one over the other at the statement level, so you end up having to +resort to "``NAME = EXPR`` is preferred because it's been around longer" +(and PEP 572 proposes to enfore that historical idiosyncrasy at the compiler +level). + +That semantic equivalence is difficult to avoid at module and class scope while +still having ``if NAME := EXPR:`` and ``while NAME := EXPR:`` work sensibly, but +at function scope the compiler's comprehensive view of all local names makes +it possible to require that the name be assigned or declared before use, +providing a reasonable incentive to continue to default to using the +``NAME = EXPR`` form when possible, while also enabling the use of the +``NAME := EXPR`` as a kind of simple compile time assertion (i.e. explicitly +indicating that the targeted name has already been bound or declared and hence +should already be known to the compiler). + +If Guido were to declare that support for inline declarations was a hard +design requirement, then this PEP would be updated to propose that +``EXPR given NAME`` also be introduced as a way to support inline name declarations +after arbitrary expressions (this would allow the inline name declarations to be +deferred until the end of a complex expression rather than needing to be +embedded in the middle of it, and PEP 8 would gain a recommendation encouraging +that style). + + +Disallowing augmented assignments in class level scoped expressions +------------------------------------------------------------------- + +While modern classes do define an implicit closure that's visible to method +implementations (in order to make ``__class__`` available for use in zero-arg +``super()`` calls), there's no way for user level code to explicitly add +additional names to that scope. + +Meanwhile, attributes defined in a class body are ignored for the purpose of +defining a method's lexical closure, which means adding them there wouldn't +work at an implementation level. + +Rather than trying to resolve that inherent ambiguity, this PEP simply +prohibits such usage, and requires that any affected logic be written somewhere +other than directly inline in the class body (e.g. in a separate helper +function). + + +Comparison operators vs assignment operators +-------------------------------------------- + +The ``OP=`` construct as an expression currently indicates a comparison +operation:: + + x == y # Equals + x >= y # Greater-than-or-equal-to + x <= y # Less-than-or-equal-to + +Both this PEP and PEP 572 propose adding at least one operator that's somewhat +similar in appearance, but defines an assignment instead:: + + x := y # Becomes + +This PEP then goes much further and allows all *13* augmented assignment symbols +to be uses as binary operators:: + + x += y # In-place add + x -= y # In-place minus + x *= y # In-place multiply + x @= y # In-place matrix multiply + x /= y # In-place division + x //= y # In-place int division + x %= y # In-place mod + x &= y # In-place bitwise and + x |= y # In-place bitwise or + x ^= y # In-place bitwise xor + x <<= y # In-place left shift + x >>= y # In-place right shift + x **= y # In-place power + +Of those additional binary operators, the most questionable would be the +bitshift assignment operators, since they're each only one doubled character +away from one of the inclusive ordered comparison operators. + + +Examples +======== + +Simplifying retry loops +----------------------- + +There are currently a few different options for writing retry loops, including:: + + # Post-decrementing a counter + remaining_attempts = MAX_ATTEMPTS + while remaining_attempts: + remaining_attempts -= 1 + try: + result = attempt_operation() + except Exception as exc: + continue # Failed, so try again + log.debug(f"Succeeded after {attempts} attempts") + break # Success! + else: + raise OperationFailed(f"Failed after {MAX_ATTEMPTS} attempts") from exc + + # Loop-and-a-half with a pre-incremented counter + attempt = 0 + while True: + attempts += 1 + if attempts > MAX_ATTEMPTS: + raise OperationFailed(f"Failed after {MAX_ATTEMPTS} attempts") from exc + try: + result = attempt_operation() + except Exception as exc: + continue # Failed, so try again + log.debug(f"Succeeded after {attempts} attempts") + break # Success! + +Each of the available options hides some aspect of the intended loop structure +inside the loop body, whether that's the state modification, the exit condition, +or both. + +The proposal in this PEP allows both the state modification and the exit +condition to be included directly in the loop header:: + + attempt = 0 + while (attempt += 1) <= MAX_ATTEMPTS: + try: + result = attempt_operation() + except Exception as exc: + continue # Failed, so try again + log.debug(f"Succeeded after {attempts} attempts") + break # Success! + else: + raise OperationFailed(f"Failed after {MAX_ATTEMPTS} attempts") from exc + + +Simplifying if-elif chains +-------------------------- + +if-elif chains that need to rebind the checked condition currently need to +be written using nested if-else statements:: + + + m = pattern.match(data) + if m: + ... + else: + m = other_pattern.match(data) + if m: + ... + else: + m = yet_another_pattern.match(data) + if m: + ... + else: + ... + +As with PEP 572, this PEP allows the else/if portions of that chain to be +condensed, making their consistent and mutually exclusive structure more +readily apparent:: + + m = pattern.match(data) + if m: + ... + elif m := other_pattern.match(data): + ... + elif m := yet_another_pattern.match(data): + ... + else: + ... + +Unlike PEP 572, this PEP requires that the assignment target be explicitly +indicated as local before the first use as a ``:=`` target, either by +binding it to a value (as shown above), or else by including an appropriate +explicit type declaration:: + + m: typing.re.Match + if m := pattern.match(data): + ... + elif m := other_pattern.match(data): + ... + elif m := yet_another_pattern.match(data): + ... + else: + ... + + +Capturing intermediate values from comprehensions +------------------------------------------------- + +The proposal in this PEP makes it straightforward to capture and reuse +intermediate values in comprehensions and generator expressions by +exporting them to the containing block scope:: + + factor: int + while any(n % (factor := p) == 0 for p in small_primes): + n //= factor + + total = 0 + partial_sums = [total += value for value in data] + + +Allowing lambda expressions to act more like re-usable code thunks +------------------------------------------------------------------ + +This PEP allows the classic closure usage example:: + + def make_counter(start=0): + x = start + def counter(step=1): + nonlocal x + x += step + return x + return counter + +To be abbreviated as:: + + def make_counter(start=0): + x = start + return lambda step=1: x += step + +While the latter form is still a conceptually dense piece of code, it can be +reasonably argued that the lack of boilerplate (where the "def", "nonlocal", +and "return" keywords and two additional repetitions of the "x" variable name +have been replaced with the "lambda" keyword) may make it easier to read in +practice. + + +Relationship with PEP 572 +========================= + +The case for allowing inline assignments at all is made in PEP 572. This +competing PEP was initially going to propose an alternate surface syntax +(``EXPR given NAME = EXPR``), while retaining the expression semantics from +PEP 572, but that changed when discussing one of the initial motivating use +cases for allowing embedded assignments at all: making it possible to easily +calculate cumulative sums in comprehensions and generator expressions. + +As a result of that, and unlike PEP 572, this PEP focuses primarily on use +cases for inline augmented assignment. It also has the effect of converting +cases that currently inevitably raise ``UnboundLocalError`` at function call +time to report a new compile time ``TargetNameError``. + +New syntax for a name rebinding expression (``NAME := TARGET``) is then added +not only to handle the same use cases as are identified in PEP 572, but also +as a lower level primitive to help illustrate, implement and explain +the new augmented assignment semantics, rather than being the sole change being +proposed. + +The author of this PEP believes that this approach makes the value of the new +flexibility in name rebinding clearer, while also mitigating many of the +potential concerns raised with PEP 572 around explaining when to use +``NAME = EXPR`` over ``NAME := EXPR`` (and vice-versa), without resorting to +prohibiting the bare statement form of ``NAME := EXPR`` outright (such +that ``NAME := EXPR`` is a compile error, but ``(NAME := EXPR)`` is permitted). + + +Acknowledgements +================ + +The PEP author wishes to thank Chris Angelico for his work on PEP 572, and his +efforts to create a coherent summary of the great many sprawling discussions +that spawned on both python-ideas and python-dev, as well as Tim Peters for +the in-depth discussion of parent local scoping that prompted the above +scoping proposal for augmented assignments inside scoped expressions. + +Eric Snow's feedback on a pre-release version of this PEP helped make it +significantly more readable. + + +References +========== + +.. [1] The beginning of Tim's genexp & comprehension scoping thread + (https://mail.python.org/pipermail/python-ideas/2018-May/050367.html) + +.. [2] Reintroducing the original cumulative sums use case + (https://mail.python.org/pipermail/python-ideas/2018-May/050544.html) + +.. [3] Tim's language reference level explanation of his proposed scoping semantics + (https://mail.python.org/pipermail/python-ideas/2018-May/050729.html) + +.. [4] Guido's endorsement of Tim's proposed genexp & comprehension scoping + (https://mail.python.org/pipermail/python-ideas/2018-May/050411.html) + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0578.rst b/pep-0578.rst new file mode 100644 index 000000000..4e2985012 --- /dev/null +++ b/pep-0578.rst @@ -0,0 +1,492 @@ +PEP: 578 +Title: Python Runtime Audit Hooks +Version: $Revision$ +Last-Modified: $Date$ +Author: Steve Dower +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 16-Jun-2018 +Python-Version: 3.8 +Post-History: + +Abstract +======== + +This PEP describes additions to the Python API and specific behaviors +for the CPython implementation that make actions taken by the Python +runtime visible to auditing tools. Visibility into these actions +provides opportunities for test frameworks, logging frameworks, and +security tools to monitor and optionally limit actions taken by the +runtime. + +This PEP proposes adding two APIs to provide insights into a running +Python application: one for arbitrary events, and another specific to +the module import system. The APIs are intended to be available in all +Python implementations, though the specific messages and values used +are unspecified here to allow implementations the freedom to determine +how best to provide information to their users. Some examples likely +to be used in CPython are provided for explanatory purposes. + +See PEP-551 for discussion and recommendations on enhancing the +security of a Python runtime making use of these auditing APIs. + +Background +========== + +Python provides access to a wide range of low-level functionality on +many common operating systems in a consistent manner. While this is +incredibly useful for "write-once, run-anywhere" scripting, it also +makes monitoring of software written in Python difficult. Because +Python uses native system APIs directly, existing monitoring +tools either suffer from limited context or auditing bypass. + +Limited context occurs when system monitoring can report that an +action occurred, but cannot explain the sequence of events leading to +it. For example, network monitoring at the OS level may be able to +report "listening started on port 5678", but may not be able to +provide the process ID, command line or parent process, or the local +state in the program at the point that triggered the action. Firewall +controls to prevent such an action are similarly limited, typically +to a process name or some global state such as the current user, and +in any case rarely provide a useful log file correlated with other +application messages. + +Auditing bypass can occur when the typical system tool used for an +action would ordinarily report its use, but accessing the APIs via +Python do not trigger this. For example, invoking "curl" to make HTTP +requests may be specifically monitored in an audited system, but +Python's "urlretrieve" function is not. + +Within a long-running Python application, particularly one that +processes user-provided information such as a web app, there is a risk +of unexpected behavior. This may be due to bugs in the code, or +deliberately induced by a malicious user. In both cases, normal +application logging may be bypassed resulting in no indication that +anything out of the ordinary has occurred. + +Additionally, and somewhat unique to Python, it is very easy to affect +the code that is run in an application by manipulating either the +import system's search path or placing files earlier on the path than +intended. This is often seen when developers create a script with the +same name as the module they intend to use - for example, a +``random.py`` file that attempts to import the standard library +``random`` module. + +Overview of Changes +=================== + +The aim of these changes is to enable both application developers and +system administrators to integrate Python into their existing +monitoring systems without dictating how those systems look or behave. + +We propose two API changes to enable this: an Audit Hook and Verified +Open Hook. Both are available from Python and native code, allowing +applications and frameworks written in pure Python code to take +advantage of the extra messages, while also allowing embedders or +system administrators to deploy "always-on" builds of Python. + +Only CPython is bound to provide the native APIs as described here. +Other implementations should provide the pure Python APIs, and +may provide native versions as appropriate for their underlying +runtimes. + +Audit Hook +---------- + +In order to observe actions taken by the runtime (on behalf of the +caller), an API is required to raise messages from within certain +operations. These operations are typically deep within the Python +runtime or standard library, such as dynamic code compilation, module +imports, DNS resolution, or use of certain modules such as ``ctypes``. + +The following new C APIs allow embedders and CPython implementors to +send and receive audit hook messages:: + + # Add an auditing hook + typedef int (*hook_func)(const char *event, PyObject *args, + void *userData); + int PySys_AddAuditHook(hook_func hook, void *userData); + + # Raise an event with all auditing hooks + int PySys_Audit(const char *event, PyObject *args); + + # Internal API used during Py_Finalize() - not publicly accessible + void _Py_ClearAuditHooks(void); + +The new Python APIs for receiving and raising audit hooks are:: + + # Add an auditing hook + sys.addaudithook(hook: Callable[[str, tuple]]) + + # Raise an event with all auditing hooks + sys.audit(str, *args) + + +Hooks are added by calling ``PySys_AddAuditHook()`` from C at any time, +including before ``Py_Initialize()``, or by calling +``sys.addaudithook()`` from Python code. Hooks cannot be removed or +replaced. + +When events of interest are occurring, code can either call +``PySys_Audit()`` from C (while the GIL is held) or ``sys.audit()``. The +string argument is the name of the event, and the tuple contains +arguments. A given event name should have a fixed schema for arguments, +which should be considered a public API (for a given x.y version +release), and thus should only change between feature releases with +updated documentation. + +For maximum compatibility, events using the same name as an event in +the reference interpreter CPython should make every attempt to use +compatible arguments. Including the name or an abbreviation of the +implementation in implementation-specific event names will also help +prevent collisions. For example, a ``pypy.jit_invoked`` event is clearly +distinguised from an ``ipy.jit_invoked`` event. + +When an event is audited, each hook is called in the order it was added +with the event name and tuple. If any hook returns with an exception +set, later hooks are ignored and *in general* the Python runtime should +terminate. This is intentional to allow hook implementations to decide +how to respond to any particular event. The typical responses will be to +log the event, abort the operation with an exception, or to immediately +terminate the process with an operating system exit call. + +When an event is audited but no hooks have been set, the ``audit()`` +function should include minimal overhead. Ideally, each argument is a +reference to existing data rather than a value calculated just for the +auditing call. + +As hooks may be Python objects, they need to be freed during +``Py_Finalize()``. To do this, we add an internal API +``_Py_ClearAuditHooks()`` that releases any Python hooks and any +memory held. This is an internal function with no public export, and +we recommend it should raise its own audit event for all current hooks +to ensure that unexpected calls are observed. + +Below in `Suggested Audit Hook Locations`_, we recommend some important +operations that should raise audit events. In PEP 551, more audited +operations are recommended with a view to security transparency. + +Python implementations should document which operations will raise +audit events, along with the event schema. It is intended that +``sys.addaudithook(print)`` be a trivial way to display all messages. + +Verified Open Hook +------------------ + +Most operating systems have a mechanism to distinguish between files +that can be executed and those that can not. For example, this may be an +execute bit in the permissions field, or a verified hash of the file +contents to detect potential code tampering. These are an important +security mechanism for preventing execution of data or code that is not +approved for a given environment. Currently, Python has no way to +integrate with these when launching scripts or importing modules. + +The new public C API for the verified open hook is:: + + # Set the handler + typedef PyObject *(*hook_func)(PyObject *path, void *userData) + int PyImport_SetOpenForImportHook(hook_func handler, void *userData) + + # Open a file using the handler + PyObject *PyImport_OpenForImport(const char *path) + +The new public Python API for the verified open hook is:: + + # Open a file using the handler + importlib.util.open_for_import(path : str) -> io.IOBase + + +The ``importlib.util.open_for_import()`` function is a drop-in +replacement for ``open(str(pathlike), 'rb')``. Its default behaviour is +to open a file for raw, binary access. To change the behaviour a new +handler should be set. Handler functions only accept ``str`` arguments. + +A custom handler may be set by calling ``PyImport_SetOpenForImportHook()`` +from C at any time, including before ``Py_Initialize()``. However, if a +hook has already been set then the call will fail. When +``open_for_import()`` is called with a hook set, the hook will be passed +the path and its return value will be returned directly. The returned +object should be an open file-like object that supports reading raw +bytes. This is explicitly intended to allow a ``BytesIO`` instance if +the open handler has already had to read the file into memory in order +to perform whatever verification is necessary to determine whether the +content is permitted to be executed. + +Note that these hooks can import and call the ``_io.open()`` function on +CPython without triggering themselves. They can also use ``_io.BytesIO`` +to return a compatible result using an in-memory buffer. + +If the hook determines that the file should not be loaded, it should +raise an exception of its choice, as well as performing any other +logging. + +All import and execution functionality involving code from a file will +be changed to use ``open_for_import()`` unconditionally. It is important +to note that calls to ``compile()``, ``exec()`` and ``eval()`` do not go +through this function - an audit hook that includes the code from these +calls is the best opportunity to validate code that is read from the +file. Given the current decoupling between import and execution in +Python, most imported code will go through both ``open_for_import()`` +and the log hook for ``compile``, and so care should be taken to avoid +repeating verification steps. + +There is no Python API provided for changing the open hook. To modify +import behavior from Python code, use the existing functionality +provided by ``importlib``. + +API Availability +---------------- + +While all the functions added here are considered public and stable API, +the behavior of the functions is implementation specific. Most +descriptions here refer to the CPython implementation, and while other +implementations should provide the functions, there is no requirement +that they behave the same. + +For example, ``sys.addaudithook()`` and ``sys.audit()`` should exist but +may do nothing. This allows code to make calls to ``sys.audit()`` +without having to test for existence, but it should not assume that its +call will have any effect. (Including existence tests in +security-critical code allows another vector to bypass auditing, so it +is preferable that the function always exist.) + +``importlib.util.open_for_import(path)`` should at a minimum always +return ``_io.open(path, 'rb')``. Code using the function should make no +further assumptions about what may occur, and implementations other than +CPython are not required to let developers override the behavior of this +function with a hook. + +Suggested Audit Hook Locations +============================== + +The locations and parameters in calls to ``sys.audit()`` or +``PySys_Audit()`` are to be determined by individual Python +implementations. This is to allow maximum freedom for implementations +to expose the operations that are most relevant to their platform, +and to avoid or ignore potentially expensive or noisy events. + +Table 1 acts as both suggestions of operations that should trigger +audit events on all implementations, and examples of event schemas. + +Table 2 provides further examples that are not required, but are +likely to be available in CPython. + +Refer to the documentation associated with your version of Python to +see which operations provide audit events. + +.. csv-table:: Table 1: Suggested Audit Hooks + :header: "API Function", "Event Name", "Arguments", "Rationale" + :widths: 2, 2, 3, 6 + + ``PySys_AddAuditHook``, ``sys.addaudithook``, "", "Detect when new + audit hooks are being added. + " + ``PyImport_SetOpenForImportHook``, ``setopenforimporthook``, "", " + Detects any attempt to set the ``open_for_import`` hook. + " + "``compile``, ``exec``, ``eval``, ``PyAst_CompileString``, + ``PyAST_obj2mod``", ``compile``, "``(code, filename_or_none)``", " + Detect dynamic code compilation, where ``code`` could be a string or + AST. Note that this will be called for regular imports of source + code, including those that were opened with ``open_for_import``. + " + "``exec``, ``eval``, ``run_mod``", ``exec``, "``(code_object,)``", " + Detect dynamic execution of code objects. This only occurs for + explicit calls, and is not raised for normal function invocation. + " + ``import``, ``import``, "``(module, filename, sys.path, + sys.meta_path, sys.path_hooks)``", "Detect when modules are + imported. This is raised before the module name is resolved to a + file. All arguments other than the module name may be ``None`` if + they are not used or available. + " + ``PyEval_SetProfile``, ``sys.setprofile``, "", "Detect when code is + injecting trace functions. Because of the implementation, exceptions + raised from the hook will abort the operation, but will not be + raised in Python code. Note that ``threading.setprofile`` eventually + calls this function, so the event will be audited for each thread. + " + ``PyEval_SetTrace``, ``sys.settrace``, "", "Detect when code is + injecting trace functions. Because of the implementation, exceptions + raised from the hook will abort the operation, but will not be + raised in Python code. Note that ``threading.settrace`` eventually + calls this function, so the event will be audited for each thread. + " + "``_PyObject_GenericSetAttr``, ``check_set_special_type_attr``, + ``object_set_class``, ``func_set_code``, ``func_set_[kw]defaults``"," + ``object.__setattr__``","``(object, attr, value)``","Detect monkey + patching of types and objects. This event + is raised for the ``__class__`` attribute and any attribute on + ``type`` objects. + " + "``_PyObject_GenericSetAttr``",``object.__delattr__``,"``(object, + attr)``","Detect deletion of object attributes. This event is raised + for any attribute on ``type`` objects. + " + "``Unpickler.find_class``",``pickle.find_class``,"``(module_name, + global_name)``","Detect imports and global name lookup when + unpickling. + " + + +.. csv-table:: Table 2: Potential CPython Audit Hooks + :header: "API Function", "Event Name", "Arguments", "Rationale" + :widths: 2, 2, 3, 6 + + ``_PySys_ClearAuditHooks``, ``sys._clearaudithooks``, "", "Notifies + hooks they are being cleaned up, mainly in case the event is + triggered unexpectedly. This event cannot be aborted. + " + ``code_new``, ``code.__new__``, "``(bytecode, filename, name)``", " + Detect dynamic creation of code objects. This only occurs for + direct instantiation, and is not raised for normal compilation. + " + ``func_new_impl``, ``function.__new__``, "``(code,)``", "Detect + dynamic creation of function objects. This only occurs for direct + instantiation, and is not raised for normal compilation. + " + "``_ctypes.dlopen``, ``_ctypes.LoadLibrary``", ``ctypes.dlopen``, " + ``(module_or_path,)``", "Detect when native modules are used. + " + ``_ctypes._FuncPtr``, ``ctypes.dlsym``, "``(lib_object, name)``", " + Collect information about specific symbols retrieved from native + modules. + " + ``_ctypes._CData``, ``ctypes.cdata``, "``(ptr_as_int,)``", "Detect + when code is accessing arbitrary memory using ``ctypes``. + " + "``new_mmap_object``",``mmap.__new__``,"``(fileno, map_size, access, + offset)``", "Detects creation of mmap objects. On POSIX, access may + have been calculated from the ``prot`` and ``flags`` arguments. + " + ``sys._getframe``, ``sys._getframe``, "``(frame_object,)``", "Detect + when code is accessing frames directly. + " + ``sys._current_frames``, ``sys._current_frames``, "", "Detect when + code is accessing frames directly. + " + "``socket.bind``, ``socket.connect``, ``socket.connect_ex``, + ``socket.getaddrinfo``, ``socket.getnameinfo``, ``socket.sendmsg``, + ``socket.sendto``", ``socket.address``, "``(address,)``", "Detect + access to network resources. The address is unmodified from the + original call. + " + "``member_get``, ``func_get_code``, ``func_get_[kw]defaults`` + ",``object.__getattr__``,"``(object, attr)``","Detect access to + restricted attributes. This event is raised for any built-in + members that are marked as restricted, and members that may allow + bypassing imports. + " + "``urllib.urlopen``",``urllib.Request``,"``(url, data, headers, + method)``", "Detects URL requests. + " + +Performance Impact +================== + +The important performance impact is the case where events are being +raised but there are no hooks attached. This is the unavoidable case - +once a distributor begins adding audit hooks they have explicitly +chosen to trade performance for functionality. Performance import +with hooks added are not of interest here, since this is considered +opt-in functionality. + +Analysis using the Python Performance Benchmark Suite [1]_ shows no +significant impact, with the vast majority of benchmarks showing +between 1.05x faster to 1.05x slower. + +In our opinion, the performance impact of the set of auditing points +described in this PEP is negligible. + +Rejected Ideas +============== + +Separate module for audit hooks +------------------------------- + +The proposal is to add a new module for audit hooks, hypothetically +``audit``. This would separate the API and implementation from the +``sys`` module, and allow naming the C functions ``PyAudit_AddHook`` and +``PyAudit_Audit`` rather than the current variations. + +Any such module would need to be a built-in module that is guaranteed to +always be present. The nature of these hooks is that they must be +callable without condition, as any conditional imports or calls provide +opportunities to intercept and suppress or modify events. + +Given its nature as one of the most core modules, the ``sys`` module is +somewhat protected against module shadowing attacks. Replacing ``sys`` +with a sufficiently functional module that the application can still run +is a much more complicated task than replacing a module with only one +function of interest. An attacker that has the ability to shadow the +``sys`` module is already capable of running arbitrary code from files, +whereas an ``audit`` module can be replaced with a single line in a +``.pth`` file anywhere on the search path:: + + import sys; sys.modules['audit'] = type('audit', (object,), + {'audit': lambda *a: None, 'addhook': lambda *a: None}) + +Multiple layers of protection already exist for monkey patching attacks +against either ``sys`` or ``audit``, but assignments or insertions to +``sys.modules`` are not audited. + +This idea is rejected because it makes substituting ``audit`` calls +throughout all callers trivial. + +Flag in sys.flags to indicate "audited" mode +-------------------------------------------- + +The proposal is to add a value in ``sys.flags`` to indicate when Python +is running in a "secure" or "audited" mode. This would allow +applications to detect when some features are enabled or when hooks +have been added and modify their behaviour appropriately. + +Currently, we are not aware of any legitimate reasons for a program to +behave differently in the presence of audit hooks. + +Both application-level APIs ``sys.audit`` and +``importlib.util.open_for_import`` are always present and functional, +regardless of whether the regular ``python`` entry point or some +alternative entry point is used. Callers cannot determine whether any +hooks have been added (except by performing side-channel analysis), nor +do they need to. The calls should be fast enough that callers do not +need to avoid them, and the program is responsible for ensuring that +any added hooks are fast enough to not affect application performance. + +The argument that this is "security by obscurity" is valid, but +irrelevant. Security by obscurity is only an issue when there are no +other protective mechanisms; obscurity as the first step in avoiding +attack is strongly recommended (see `this article +`_ for +discussion). + +This idea is rejected because there are no appropriate reasons for an +application to change its behaviour based on whether these APIs are in +use. + +Relationship to PEP 551 +======================= + +This API was originally presented as part of +`PEP 551 `_ Security +Transparency in the Python Runtime. + +For simpler review purposes, and due to the broader applicability of +these APIs beyond security, the API design is now presented separately. + +PEP 551 is an informational PEP discussing how to integrate Python into +a secure or audited environment. + +References +========== + +.. [1] Python Performance Benchmark Suite ``_ + +Copyright +========= + +Copyright (c) 2018 by Microsoft Corporation. This material may be +distributed only subject to the terms and conditions set forth in the +Open Publication License, v1.0 or later (the latest version is presently +available at http://www.opencontent.org/openpub/). diff --git a/pep-0579.rst b/pep-0579.rst new file mode 100644 index 000000000..86eb2f877 --- /dev/null +++ b/pep-0579.rst @@ -0,0 +1,416 @@ +PEP: 579 +Title: Refactoring C functions and methods +Author: Jeroen Demeyer +Status: Draft +Type: Informational +Content-Type: text/x-rst +Created: 04-Jun-2018 +Post-History: 20-Jun-2018 + + +Abstract +======== + +This meta-PEP collects various issues with CPython's existing implementation +of built-in functions (functions implemented in C) and methods. + +Fixing all these issues is too much for one PEP, +so that will be delegated to other standards track PEPs. +However, this PEP does give some brief ideas of possible fixes. +This is mainly meant to coordinate an overall strategy. +For example, a proposed solution may sound too complicated +for fixing any one single issue, but it may be the best overall +solution for multiple issues. + +This PEP is purely informational: +it does not imply that all issues will eventually +be fixed, nor that they will be fixed using the solution proposed here. + +It also serves as a check-list of possible requested features +to verify that a given fix does not make those +other features harder to implement. + +The major proposed change is replacing ``PyMethodDef`` +by a new structure ``PyCCallDef`` +which collects everything needed for calling the function/method. +In the ``PyTypeObject`` structure, a new field ``tp_ccalloffset`` +is added giving an offset to a ``PyCCallDef *`` in the object structure. + +**NOTE**: This PEP deals only with CPython implementation details, +it does not affect the Python language or standard library. + + +Issues +====== + +This lists various issues with built-in functions and methods, +together with a plan for a solution and (if applicable) +pointers to standards track PEPs discussing the details. + + +1. Naming +--------- + +The word "built-in" is overused in Python. +From a quick skim of the Python documentation, it mostly refers +to things from the ``builtins`` module. +In other words: things which are available in the global namespace +without a need for importing them. +This conflicts with the use of the word "built-in" to mean "implemented in C". + +**Solution**: since the C structure for built-in functions and methods is already +called ``PyCFunctionObject``, +let's use the name "cfunction" and "cmethod" instead of "built-in function" +and "built-in method". + + +2. Not extendable +----------------- + +The various classes involved (such as ``builtin_function_or_method``) +cannot be subclassed:: + + >>> from types import BuiltinFunctionType + >>> class X(BuiltinFunctionType): + ... pass + Traceback (most recent call last): + File "", line 1, in + TypeError: type 'builtin_function_or_method' is not an acceptable base type + +This is a problem because it makes it impossible to add features +such as introspection support to these classes. + +If one wants to implement a function in C with additional functionality, +an entirely new class must be implemented from scratch. +The problem with this is that the existing classes like +``builtin_function_or_method`` are special-cased in the Python interpreter +to allow faster calling (for example, by using ``METH_FASTCALL``). +It is currently impossible to have a custom class with the same optimizations. + +**Solution**: make the existing optimizations available to arbitrary classes. +This is done by adding a new ``PyTypeObject`` field ``tp_ccalloffset`` +(or can we re-use ``tp_print`` for that?) +specifying the offset of a ``PyCCallDef`` pointer. +This is a new structure holding all information needed to call +a cfunction and it would be used instead of ``PyMethodDef``. +This implements the new "C call" protocol. + +For constructing cfunctions and cmethods, ``PyMethodDef`` arrays +will still be used (for example, in ``tp_methods``) but that will +be the *only* remaining purpose of the ``PyMethodDef`` structure. + +Additionally, we can also make some function classes subclassable. +However, this seems less important once we have ``tp_ccalloffset``. + +**Reference**: PEP 580 + + +3. cfunctions do not become methods +----------------------------------- + +A cfunction like ``repr`` does not implement ``__get__`` to bind +as a method:: + + >>> class X: + ... meth = repr + >>> x = X() + >>> x.meth() + Traceback (most recent call last): + File "", line 1, in + TypeError: repr() takes exactly one argument (0 given) + +In this example, one would have expected that ``x.meth()`` returns +``repr(x)`` by applying the normal rules of methods. + +This is surprising and a needless difference +between cfunctions and Python functions. +For the standard built-in functions, this is not really a problem +since those are not meant to used as methods. +But it does become a problem when one wants to implement a +new cfunction with the goal of being usable as method. + +Again, a solution could be to create a new class behaving just +like cfunctions but which bind as methods. +However, that would lose some existing optimizations for methods, +such as the ``LOAD_METHOD``/``CALL_METHOD`` opcodes. + +**Solution**: the same as the previous issue. +It just shows that handling ``self`` and ``__get__`` +should be part of the new C call protocol. + +For backwards compatibility, we would keep the existing non-binding +behavior of cfunctions. We would just allow it in custom classes. + +**Reference**: PEP 580 + + +4. Semantics of inspect.isfunction +---------------------------------- + +Currently, ``inspect.isfunction`` returns ``True`` only for instances +of ``types.FunctionType``. +That is, true Python functions. + +A common use case for ``inspect.isfunction`` is checking for introspection: +it guarantees for example that ``inspect.getfile()`` will work. +Ideally, it should be possible for other classes to be treated as +functions too. + +**Solution**: introduce a new ``InspectFunction`` abstract base class +and use that to implement ``inspect.isfunction``. +Alternatively, use duck typing for ``inspect.isfunction`` +(as proposed in [#bpo30071]_):: + + def isfunction(obj): + return hasattr(type(obj), "__code__") + + +5. C functions should have access to the function object +-------------------------------------------------------- + +The underlying C function of a cfunction currently +takes a ``self`` argument (for bound methods) +and then possibly a number of arguments. +There is no way for the C function to actually access the Python +cfunction object (the ``self`` in ``__call__`` or ``tp_call``). +This would for example allow implementing the +C call protocol for Python functions (``types.FunctionType``): +the C function which implements calling Python functions +needs access to the ``__code__`` attribute of the function. + +This is also needed for PEP 573 +where all cfunctions require access to their "parent" +(the module for functions of a module or the defining class +for methods). + +**Solution**: add a new ``PyMethodDef`` flag to specify +that the C function takes an additional argument (as first argument), +namely the function object. + +**References**: PEP 580, PEP 573 + + +6. METH_FASTCALL is private and undocumented +-------------------------------------------- + +The ``METH_FASTCALL`` mechanism allows calling cfunctions and cmethods +using a C array of Python objects instead of a ``tuple``. +This was introduced in Python 3.6 for positional arguments only +and extended in Python 3.7 with support for keyword arguments. + +However, given that it is undocumented, +it is presumably only supposed to be used by CPython itself. + +**Solution**: since this is an important optimization, +everybody should be encouraged to use it. +Now that the implementation of ``METH_FASTCALL`` is stable, document it! + +As part of the C call protocol, we should also add a C API function :: + + PyObject *PyCCall_FastCall(PyObject *func, PyObject *const *args, Py_ssize_t nargs, PyObject *keywords) + +**Reference**: PEP 580 + + +7. Allowing native C arguments +------------------------------ + +A cfunction always takes its arguments as Python objects +(say, an array of ``PyObject`` pointers). +In cases where the cfunction is really wrapping a native C function +(for example, coming from ``ctypes`` or some compiler like Cython), +this is inefficient: calls from C code to C code are forced to use +Python objects to pass arguments. + +Analogous to the buffer protocol which allows access to C data, +we should also allow access to the underlying C callable. + +**Solution**: when wrapping a C function with native arguments +(for example, a C ``long``) inside a cfunction, +we should also store a function pointer to the underlying C function, +together with its C signature. + +Argument Clinic could automatically do this by storing +a pointer to the "impl" function. + + +8. Complexity +------------- + +There are a huge number of classes involved to implement +all variations of methods. +This is not a problem by itself, but a compounding issue. + +For ordinary Python classes, the table below gives the classes +for various kinds of methods. +The columns refer to the class in the class ``__dict__``, +the class for unbound methods (bound to the class) +and the class for bound methods (bound to the instance): + +============= ================ ============ ============ +kind __dict__ unbound bound +============= ================ ============ ============ +Normal method ``function`` ``function`` ``method`` +Static method ``staticmethod`` ``function`` ``function`` +Class method ``classmethod`` ``method`` ``method`` +Slot method ``function`` ``function`` ``method`` +============= ================ ============ ============ + +This is the analogous table for extension types (C classes): + +============= ========================== ============================== ============================== +kind __dict__ unbound bound +============= ========================== ============================== ============================== +Normal method ``method_descriptor`` ``method_descriptor`` ``builtin_function_or_method`` +Static method ``staticmethod`` ``builtin_function_or_method`` ``builtin_function_or_method`` +Class method ``classmethod_descriptor`` ``builtin_function_or_method`` ``builtin_function_or_method`` +Slot method ``wrapper_descriptor`` ``wrapper_descriptor`` ``method-wrapper`` +============= ========================== ============================== ============================== + +There are a lot of classes involved +and these two tables look very different. +There is no good reason why Python methods should be +treated fundamentally different from C methods. +Also the features are slightly different: +for example, ``method`` supports ``__func__`` +but ``builtin_function_or_method`` does not. + +Since CPython has optimizations for calls to most of these objects, +the code for dealing with them can also become complex. +A good example of this is the ``call_function`` function in ``Python/ceval.c``. + +**Solution**: all these classes should implement the C call protocol. +Then the complexity in the code can mostly be fixed by +checking for the C call protocol (``tp_ccalloffset != 0``) +instead of doing type checks. + +Furthermore, it should be investigated whether some of these classes can be merged +and whether ``method`` can be re-used also for bound methods of extension types +(see PEP 576 for the latter, +keeping in mind that this may have some minor backwards compatibility issues). +This is not a goal by itself but just something to keep in mind +when working on these classes. + + +9. PyMethodDef is too limited +----------------------------- + +The typical way to create a cfunction or cmethod in an extension module +is by using a ``PyMethodDef`` to define it. +These are then stored in an array ``PyModuleDef.m_methods`` +(for cfunctions) or ``PyTypeObject.tp_methods`` (for cmethods). +However, because of the stable ABI (PEP 384), +we cannot change the ``PyMethodDef`` structure. + +So, this means that we cannot add new fields for creating cfunctions/cmethods +this way. +This is probably the reason for the hack that +``__doc__`` and ``__text_signature__`` are stored in the same C string +(with the ``__doc__`` and ``__text_signature__`` descriptors extracting +the relevant part). + +**Solution**: stop assuming that a single ``PyMethodDef`` entry +is sufficient to describe a cfunction/cmethod. +Instead, we could add some flag which means that one of the ``PyMethodDef`` +fields is instead a pointer to an additional structure. +Or, we could add a flag to use two or more consecutive ``PyMethodDef`` +entries in the array to store more data. +Then the ``PyMethodDef`` array would be used only to construct +cfunctions/cmethods but it would no longer be used after that. + + +10. Slot wrappers have no custom documentation +---------------------------------------------- + +Right now, slot wrappers like ``__init__`` or ``__lt__`` only have very +generic documentation, not at all specific to the class:: + + >>> list.__init__.__doc__ + 'Initialize self. See help(type(self)) for accurate signature.' + >>> list.__lt__.__doc__ + 'Return self>> list.__init__.__text_signature__ + '($self, /, *args, **kwargs)' + +As you can see, slot wrappers do support ``__doc__`` +and ``__text_signature__``. +The problem is that these are stored in ``struct wrapperbase``, +which is common for all wrappers of a specific slot +(for example, the same ``wrapperbase`` is used for ``str.__eq__`` and ``int.__eq__``). + +**Solution**: rethink the slot wrapper class to allow docstrings +(and text signatures) for each instance separately. + +This still leaves the question of how extension modules +should specify the documentation. +The ``PyTypeObject`` entries like ``tp_init`` are just function pointers, +we cannot do anything with those. +One solution would be to add entries to the ``tp_methods`` array +just for adding docstrings. +Such an entry could look like :: + + {"__init__", NULL, METH_SLOTDOC, "pointer to __init__ doc goes here"} + + +11. Static methods and class methods should be callable +------------------------------------------------------- + +Instances of ``staticmethod`` and ``classmethod`` should be callable. +Admittedly, there is no strong use case for this, +but it has occasionally been requested (see for example [#bpo20309]_). + +Making static/class methods callable would increase consistency. +First of all, function decorators typically add functionality or modify +a function, but the result remains callable. This is not true for +``@staticmethod`` and ``@classmethod``. + +Second, class methods of extension types are already callable:: + + >>> fromhex = float.__dict__["fromhex"] + >>> type(fromhex) + + >>> fromhex(float, "0xff") + 255.0 + +Third, one can see ``function``, ``staticmethod`` and ``classmethod`` +as different kinds of unbound methods: +they all become ``method`` when bound, but the implementation of ``__get__`` +is slightly different. +From this point of view, it looks strange that ``function`` is callable +but the others are not. + +**Solution**: +when changing the implementation of ``staticmethod``, ``classmethod``, +we should consider making instances callable. +Even if this is not a goal by itself, it may happen naturally +because of the implementation. + + +References +========== + +.. [#bpo20309] Not all method descriptors are callable + (https://bugs.python.org/issue20309) + +.. [#bpo30071] Duck-typing inspect.isfunction() + (https://bugs.python.org/issue30071) + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0580.rst b/pep-0580.rst new file mode 100644 index 000000000..125243618 --- /dev/null +++ b/pep-0580.rst @@ -0,0 +1,611 @@ +PEP: 580 +Title: The C call protocol +Author: Jeroen Demeyer +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 14-Jun-2018 +Python-Version: 3.8 +Post-History: 20-Jun-2018, 22-Jun-2018, 16-Jul-2018 + + +Abstract +======== + +A new "C call" protocol is proposed. +It is meant for classes representing functions or methods +which need to implement fast calling. +The goal is to generalize existing optimizations for built-in functions +to arbitrary extension types. + +In the reference implementation, +this new protocol is used for the existing classes +``builtin_function_or_method`` and ``method_descriptor``. +However, in the future, more classes may implement it. + +**NOTE**: This PEP deals only with CPython implementation details, +it does not affect the Python language or standard library. + + +Motivation +========== + +Currently, the Python bytecode interpreter has various optimizations +for calling instances of ``builtin_function_or_method``, +``method_descriptor``, ``method`` and ``function``. +However, none of these classes is subclassable. +Therefore, these optimizations are not available to +user-defined extension types. + +If this PEP is implemented, then the checks +for ``builtin_function_or_method`` and ``method_descriptor`` +could be replaced by simply checking for and using the C call protocol. +This simplifies existing code. + +We also design the C call protocol such that it can easily +be extended with new features in the future. + +For more background and motivation, see PEP 579. + + +Basic idea +========== + +Currently, CPython has multiple optimizations for fast calling +for a few specific function classes. +Calling instances of these classes using a plain ``tp_call`` is slower +than using the optimizations. +The basic idea of this PEP is to allow user-defined extension types +(not Python classes) to use these optimizations also, +both as caller and as callee. + +The existing class ``builtin_function_or_method`` and a few others +use a ``PyMethodDef`` structure for describing the underlying C function and its signature. +The first concrete change is that this is replaced by a new structure ``PyCCallDef``. +This stores some of the same information as a ``PyMethodDef``, +but with one important addition: +the "parent" of the function (the class or module where it is defined). +Note that ``PyMethodDef`` arrays are still used to construct +functions/methods but no longer for calling them. + +Second, we want that every class can use such a ``PyCCallDef`` for optimizing calls, +so the ``PyTypeObject`` structure gains a ``tp_ccalloffset`` field +giving an offset to a ``PyCCallDef *`` in the object structure +and a flag ``Py_TPFLAGS_HAVE_CCALL`` indicating that ``tp_ccalloffset`` is valid. + +Third, since we want to deal efficiently with unbound and bound methods too +(as opposed to only plain functions), we need to handle ``__self__`` too: +after the ``PyCCallDef *`` in the object structure, +there is a ``PyObject *self`` field. +These two fields together are referred to as a ``PyCCallRoot`` structure. + +The new protocol for efficiently calling objects using these new structures +is called the "C call protocol". + + +New data structures +=================== + +The ``PyTypeObject`` structure gains a new field ``Py_ssize_t tp_ccalloffset`` +and a new flag ``Py_TPFLAGS_HAVE_CCALL``. +If this flag is set, then ``tp_ccalloffset`` is assumed to be a valid +offset inside the object structure (similar to ``tp_weaklistoffset``). +It must be a strictly positive integer. +At that offset, a ``PyCCallRoot`` structure appears:: + + typedef struct { + PyCCallDef *cr_ccall; + PyObject *cr_self; /* __self__ argument for methods */ + } PyCCallRoot; + +The ``PyCCallDef`` structure contains everything needed to describe how +the function can be called:: + + typedef struct { + uint32_t cc_flags; + PyCFunc cc_func; /* C function to call */ + PyObject *cc_parent; /* class or module */ + } PyCCallDef; + +The reason for putting ``__self__`` outside of ``PyCCallDef`` +is that ``PyCCallDef`` is not meant to be changed after creating the function. +A single ``PyCCallDef`` can be shared +by an unbound method and multiple bound methods. +This wouldn't work if we would put ``__self__`` inside that structure. + +**NOTE**: unlike ``tp_dictoffset`` we do not allow negative numbers +for ``tp_ccalloffset`` to mean counting from the end. +There does not seem to be a use case for it and it would only complicate +the implementation. + +Parent +------ + +The ``cc_parent`` field (accessed for example by a ``__parent__`` +or ``__objclass__`` descriptor from Python code) can be any Python object. +For methods of extension types, this is set to the class. +For functions of modules, this is set to the module. + +The parent serves multiple purposes: for methods of extension types, +it is used for type checks like the following:: + + >>> list.append({}, "x") + Traceback (most recent call last): + File "", line 1, in + TypeError: descriptor 'append' requires a 'list' object but received a 'dict' + +PEP 573 specifies that every function should have access to the +module in which it is defined. +For functions of a module, this is given by the parent. +For methods, this works indirectly through the class, +assuming that the class has a pointer to the module. + +The parent would also typically be used to implement ``__qualname__``. +The new C API function ``PyCCall_GenericGetQualname()`` does exactly that. + +Custom classes are free to set ``cc_parent`` to whatever they want. +It is only used by the C call protocol if the ``CCALL_OBJCLASS`` flag is set. + +Using tp_print +-------------- + +We propose to replace the existing unused field ``tp_print`` +by ``tp_ccalloffset``. +Since ``Py_TPFLAGS_HAVE_CCALL`` would *not* be added to +``Py_TPFLAGS_DEFAULT``, this ensures full backwards compatibility for +existing extension modules setting ``tp_print``. +It also means that we can require that ``tp_ccalloffset`` is a valid +offset when ``Py_TPFLAGS_HAVE_CCALL`` is specified: +we do not need to check ``tp_ccalloffset != 0``. +In future Python versions, we may decide that ``tp_print`` +becomes ``tp_ccalloffset`` unconditionally, +drop the ``Py_TPFLAGS_HAVE_CCALL`` flag and instead check for +``tp_ccalloffset != 0``. + + +The C call protocol +=================== + +We say that a class implements the C call protocol +if it has the ``Py_TPFLAGS_HAVE_CCALL`` flag set +(as explained above, it must then set ``tp_ccalloffset > 0``). +Such a class must implement ``__call__`` as described in this section +(in practice, this just means setting ``tp_call`` to ``PyCCall_Call``). + +The ``cc_func`` field is a C function pointer. +Its precise signature depends on flags. +Below are the possible values for ``cc_flags & CCALL_SIGNATURE`` +together with the arguments that the C function takes. +The return value is always ``PyObject *``. +The following are completely analogous to the existing ``PyMethodDef`` +signature flags: + +- ``CCALL_VARARGS``: ``cc_func(PyObject *self, PyObject *args)`` + +- ``CCALL_VARARGS | CCALL_KEYWORDS``: ``cc_func(PyObject *self, PyObject *args, PyObject *kwds)`` + +- ``CCALL_FASTCALL``: ``cc_func(PyObject *self, PyObject *const *args, Py_ssize_t nargs)`` + +- ``CCALL_FASTCALL | CCALL_KEYWORDS``: ``cc_func(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)`` + +- ``CCALL_NULLARG``: ``cc_func(PyObject *self, PyObject *null)`` + (the function takes no arguments but a ``NULL`` is passed to the C function) + +- ``CCALL_O``: ``cc_func(PyObject *self, PyObject *arg)`` + +The flag ``CCALL_FUNCARG`` may be combined with any of these. +If so, the C function takes an additional argument as first argument +which is the function object (the ``self`` in ``__call__``). +For example, we have the following signature: + +- ``CCALL_FUNCARG | CCALL_VARARGS``: ``cc_func(PyObject *func, PyObject *self, PyObject *args)`` + +**NOTE**: in the case of bound methods, it is currently unspecified +whether the "function object" in the paragraph above refers +to the bound method or the original function (which is wrapped by the bound method). +In the reference implementation, the bound method is passed. +In the future, this may change to the wrapped function. +Despite this ambiguity, the implementation of bound methods +guarantees that ``PyCCall_CCALLDEF(func)`` +points to the ``PyCCallDef`` of the original function. + +**NOTE**: unlike the existing ``METH_...`` flags, +the ``CCALL_...`` constants do not necessarily represent single bits. +So checking ``(cc_flags & CCALL_VARARGS) == 0`` is not a valid way +for checking the signature. +There are also no guarantees of binary compatibility +between Python versions for these flags. + +Checking __objclass__ +--------------------- + +If the ``CCALL_OBJCLASS`` flag is set and if ``cr_self`` is NULL +(this is the case for unbound methods of extension types), +then a type check is done: +the function must be called with at least one positional argument +and the first (typically called ``self``) must be an instance of +``cc_parent`` (which must be a class). +If not, a ``TypeError`` is raised. + +Self slicing +------------ + +If ``cr_self`` is not NULL or if the flag ``CCALL_SLICE_SELF`` +is not set in ``cc_flags``, then the argument passed as ``self`` +is simply ``cr_self``. + +If ``cr_self`` is NULL and the flag ``CCALL_SLICE_SELF`` is set, +then the first positional argument is removed from +``args`` and instead passed as first argument to the C function. +Effectively, the first positional argument is treated as ``__self__``. +If there are no positional arguments, ``TypeError`` is raised. + +This process is called self slicing and a function is said to have self +slicing if ``cr_self`` is NULL and ``CCALL_SLICE_SELF`` is set. + +Note that a ``METH_NULLARG`` function with self slicing effectively has +one argument, namely ``self``. +Analogously, a ``METH_O`` function with self slicing has two arguments. + +Descriptor behavior +------------------- + +Classes supporting the C call protocol +must implement the descriptor protocol in a specific way. +This is required for an efficient implementation of bound methods: +it allows sharing the ``PyCCallDef`` structure between bound and unbound methods. +It is also needed for a correct implementation of ``_PyObject_GetMethod`` +which is used by the ``LOAD_METHOD``/``CALL_METHOD`` optimization. +First of all, if ``func`` supports the C call protocol, +then ``func.__set__`` must not be implemented. + +Second, ``func.__get__`` must behave as follows: + +- If ``cr_self`` is not NULL, then ``__get__`` must be a no-op + in the sense that ``func.__get__(obj, cls)(*args, **kwds)`` + behaves exactly the same as ``func(*args, **kwds)``. + It is also allowed for ``__get__`` to be not implemented at all. + +- If ``cr_self`` is NULL, then ``func.__get__(obj, cls)(*args, **kwds)`` + (with ``obj`` not None) + must be equivalent to ``func(obj, *args, **kwds)``. + In particular, ``__get__`` must be implemented in this case. + Note that this is unrelated to self slicing: ``obj`` may be passed + as ``self`` argument to the C function or it may be the first positional argument. + +- If ``cr_self`` is NULL, then ``func.__get__(None, cls)(*args, **kwds)`` + must be equivalent to ``func(*args, **kwds)``. + +There are no restrictions on the object ``func.__get__(obj, cls)``. +The latter is not required to implement the C call protocol for example. +It only specifies what ``func.__get__(obj, cls).__call__`` does. + +For classes that do not care about ``__self__`` and ``__get__`` at all, +the easiest solution is to assign ``cr_self = Py_None`` +(or any other non-NULL value). + +__name__ attribute +------------------ + +The C call protocol requires that the function has a ``__name__`` +attribute which is of type ``str`` (not a subclass). + +Furthermore, this must be idempotent in the sense +that getting the ``__name__`` attribute twice in a row must return +exactly the same Python object. +This implies that it cannot be a temporary object, it must be stored somewhere. +This is required because ``PyEval_GetFuncName`` and ``PyEval_GetFuncDesc`` +use borrowed references to the ``__name__`` attribute. + +Generic API functions +--------------------- + +This section lists the new public API functions dealing with the C call protocol. + +- ``int PyCCall_Check(PyObject *op)``: + return true if ``op`` implements the C call protocol. + +All the functions and macros below +apply to any instance supporting the C call protocol. +In other words, ``PyCCall_Check(func)`` must be true. + +- ``PyObject * PyCCall_Call(PyObject *func, PyObject *args, PyObject *kwds)``: + call ``func`` with positional arguments ``args`` + and keyword arguments ``kwds`` (``kwds`` may be NULL). + This function is meant to be put in the ``tp_call`` slot. + +- ``PyObject * PyCCall_FASTCALL(PyObject *func, PyObject *const *args, Py_ssize_t nargs, PyObject *kwds)``: + call ``func`` with ``nargs`` positional arguments given by ``args[0]``, …, ``args[nargs-1]``. + The parameter ``kwds`` can be NULL (no keyword arguments), + a dict with ``name:value`` items or a tuple with keyword names. + In the latter case, the keyword values are stored in the ``args`` + array, starting at ``args[nargs]``. + +Macros to access the ``PyCCallRoot`` and ``PyCCallDef`` structures: + +- ``PyCCallRoot * PyCCall_CCALLROOT(PyObject *func)``: + pointer to the ``PyCCallRoot`` structure inside ``func``. + +- ``PyCCallDef * PyCCall_CCALLDEF(PyObject *func)``: + shorthand for ``PyCCall_CCALLROOT(func)->cr_ccall``. + +- ``PyCCallDef * PyCCall_FLAGS(PyObject *func)``: + shorthand for ``PyCCall_CCALLROOT(func)->cr_ccall->cc_flags``. + +- ``PyObject * PyCCall_SELF(PyOject *func)``: + shorthand for ``PyCCall_CCALLROOT(func)->cr_self``. + +Generic getters, meant to be put into the ``tp_getset`` array: + +- ``PyObject * PyCCall_GenericGetParent(PyObject *func, void *closure)``: + return ``cc_parent``. + Raise ``AttributeError`` if ``cc_parent`` is NULL. + +- ``PyObject * PyCCall_GenericGetQualname(PyObject *func, void *closure)``: + return a string suitable for using as ``__qualname__``. + This uses the ``__qualname__`` of ``cc_parent`` if possible. + It also uses the ``__name__`` attribute. + +- ``PyObject * PyCCall_GenericGetSelf(PyObject *func, void *closure)``: + return ``cr_self``. + Raise ``AttributeError`` if ``cr_self`` is NULL. + +Profiling +--------- + +The profiling events +``c_call``, ``c_return`` and ``c_exception`` are only generated +when calling actual instances of ``builtin_function_or_method`` or ``method_descriptor``. +This is done for simplicity and also for backwards compatibility +(such that the profile function does not receive objects that it does not recognize). +In a future PEP, we may extend C-level profiling to arbitrary classes +implementing the C call protocol. + + +Changes to built-in functions and methods +========================================= + +The reference implementation of this PEP changes +the existing classes ``builtin_function_or_method`` and ``method_descriptor`` +to use the C call protocol. +In fact, those two classes are almost merged: +the implementation becomes very similar, but they remain separate classes +(mostly for backwards compatibility). +The ``PyCCallDef`` structure is simply stored +as part of the object structure. +Both classes use ``PyCFunctionObject`` as object structure. +This is the new layout:: + + typedef struct { + PyObject_HEAD + PyCCallDef *m_ccall; + PyObject *m_self; /* Passed as 'self' arg to the C function */ + PyCCallDef _ccalldef; /* Storage for m_ccall */ + PyObject *m_name; /* __name__; str object (not NULL) */ + PyObject *m_module; /* __module__; can be anything */ + const char *m_doc; /* __text_signature__ and __doc__ */ + PyObject *m_weakreflist; /* List of weak references */ + } PyCFunctionObject; + +For functions of a module and for unbound methods of extension types, +``m_ccall`` points to the ``_ccalldef`` field. +For bound methods, ``m_ccall`` points to the ``PyCCallDef`` +of the unbound method. + +**NOTE**: the new layout of ``method_descriptor`` changes it +such that it no longer starts with ``PyDescr_COMMON``. +This is purely an implementation detail and it should cause few (if any) +compatibility problems. + +C API functions +--------------- + +The following function is added (also to the stable ABI [#pep384]_): + +- ``PyObject * PyCFunction_ClsNew(PyTypeObject *cls, PyMethodDef *ml, PyObject *self, PyObject *module, PyObject *parent)``: + create a new object with object structure ``PyCFunctionObject`` and class ``cls``. + This is called in turn by ``PyCFunction_NewEx`` and ``PyDescr_NewMethod``. + +The undocumented functions ``PyCFunction_GetFlags`` +and ``PyCFunction_GET_FLAGS`` +are removed because it would be non-trivial to support them +in a backwards-compatible way. + + +Inheritance +=========== + +Extension types inherit the type flag ``Py_TPFLAGS_HAVE_CCALL`` +and the value ``tp_ccalloffset`` from the base class, +provided that they implement ``tp_call`` and ``tp_descr_get`` +the same way as the base class. +Heap types never inherit the C call protocol because +that would not be safe (heap types can be changed dynamically). + + +Performance +=========== + +This PEP should not impact the performance of existing code +(in the positive or negative sense). +It is meant to allow efficient new code to be written, +not to make existing code faster. + + +Stable ABI +========== + +None of the functions, structures or constants dealing with the C call protocol +are added to the stable ABI [#pep384]_. + +There are two reasons for this: +first of all, the most useful feature of the C call protocol is probably the +``METH_FASTCALL`` calling convention. +Given that this is not even part of the public API (see also PEP 579, issue 6), +it would be strange to add anything else from the C call protocol +to the stable ABI. + +Second, we want the C call protocol to be extensible in the future. +By not adding anything to the stable ABI, +we are free to do that without restrictions. + + +Backwards compatibility +======================= + +There should be no difference at all for the Python interface, +and neither for the documented C API +(in the sense that all functions remain supported with the same functionality). + +The removed function ``PyCFunction_GetFlags``, +is officially part of the stable ABI [#pep384]_. +However, this is probably an oversight: +first of all, it is not even documented. +Second, the flag ``METH_FASTCALL`` +is not part of the stable ABI but it is very common +(because of Argument Clinic). +So, if one cannot support ``METH_FASTCALL``, +it is hard to imagine a use case for ``PyCFunction_GetFlags``. +The fact that ``PyCFunction_GET_FLAGS`` and ``PyCFunction_GetFlags`` +are not used at all by CPython outside of ``Objects/call.c`` +further shows that these functions are not particularly useful. + +Concluding: the only potential breakage is with C code +which accesses the internals of ``PyCFunctionObject`` and ``PyMethodDescrObject``. +We expect very few problems because of this. + + +Rationale +========= + +Why is this better than PEP 575? +-------------------------------- + +One of the major complaints of PEP 575 was that is was coupling +functionality (the calling and introspection protocol) +with the class hierarchy: +a class could only benefit from the new features +if it was a subclass of ``base_function``. +It may be difficult for existing classes to do that +because they may have other constraints on the layout of the C object structure, +coming from an existing base class or implementation details. +For example, ``functools.lru_cache`` cannot implement PEP 575 as-is. + +It also complicated the implementation precisely because changes +were needed both in the implementation details and in the class hierarchy. + +The current PEP does not have these problems. + +Why store the function pointer in the instance? +----------------------------------------------- + +The actual information needed for calling an object +is stored in the instance (in the ``PyCCallDef`` structure) +instead of the class. +This is different from the ``tp_call`` slot or earlier attempts +at implementing a ``tp_fastcall`` slot [#bpo29259]_. + +The main use case is built-in functions and methods. +For those, the C function to be called does depend on the instance. + +Note that the current protocol makes it easy to support the case +where the same C function is called for all instances: +just use a single static ``PyCCallDef`` structure for every instance. + +Why CCALL_OBJCLASS? +------------------- + +The flag ``CCALL_OBJCLASS`` is meant to support various cases +where the class of a ``self`` argument must be checked, such as:: + + >>> list.append({}, None) + Traceback (most recent call last): + File "", line 1, in + TypeError: append() requires a 'list' object but received a 'dict' + + >>> list.__len__({}) + Traceback (most recent call last): + File "", line 1, in + TypeError: descriptor '__len__' requires a 'list' object but received a 'dict' + + >>> float.__dict__["fromhex"](list, "0xff") + Traceback (most recent call last): + File "", line 1, in + TypeError: descriptor 'fromhex' for type 'float' doesn't apply to type 'list' + +In the reference implementation, only the first of these uses the new code. +The other examples show that these kind of checks appear +in multiple places, so it makes sense to add generic support for them. + +Why CCALL_SLICE_SELF? +--------------------- + +The flag ``CCALL_SLICE_SELF`` and the concept of self slicing +are needed to support methods: +the C function should not care +whether it is called as unbound method or as bound method. +In both cases, there should be a ``self`` argument +and this is simply the first positional argument of an unbound method call. + +For example, ``list.append`` is a ``METH_O`` method. +Both the calls ``list.append([], 42)`` and ``[].append(42)`` should +translate to the C call ``list_append([], 42)``. + +Thanks to the proposed C call protocol, we can support this in such a way +that both the unbound and the bound method share a ``PyCCallDef`` +structure (with the ``CCALL_SLICE_SELF`` flag set). + +Concluding, ``CCALL_SLICE_SELF`` has two advantages: +there is no extra layer of indirection for calling +and constructing bound methods does not require setting up a ``PyCCallDef`` structure. + +Replacing tp_print +------------------ + +We repurpose ``tp_print`` as ``tp_ccalloffset`` because this makes +it easier for external projects to backport the C call protocol +to earlier Python versions. +In particular, the Cython project has shown interest in doing that +(see https://mail.python.org/pipermail/python-dev/2018-June/153927.html). + + +Alternative suggestions +======================= + +PEP 576 is an alternative approach to solving the same problem as this PEP. +See https://mail.python.org/pipermail/python-dev/2018-July/154238.html +for comments on the difference between PEP 576 and PEP 580. + + +Reference implementation +======================== + +The reference implementation can be found at +https://github.com/jdemeyer/cpython/tree/pep580 + + +References +========== + +.. [#pep384] Löwis, PEP 384 – Defining a Stable ABI, + https://www.python.org/dev/peps/pep-0384/ + +.. [#bpo29259] Add tp_fastcall to PyTypeObject: support FASTCALL calling convention for all callable objects, + https://bugs.python.org/issue29259 + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-0801.rst b/pep-0801.rst new file mode 100644 index 000000000..cd4e6bf0b --- /dev/null +++ b/pep-0801.rst @@ -0,0 +1,30 @@ +PEP: 801 +Title: Reserved +Author: Barry Warsaw +Status: Draft +Type: Informational +Content-Type: text/x-rst +Created: 21-Jun-2018 + + +Abstract +======== + +This PEP is reserved for future use. Contact the author or +`the PEP editors `_ for details. + + +Copyright +========= + +This document has been placed in the public domain. + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End: diff --git a/pep-3107.txt b/pep-3107.txt index 2d75e9f10..477c8491a 100644 --- a/pep-3107.txt +++ b/pep-3107.txt @@ -151,7 +151,7 @@ parentheses around the parameter list. However it was decided [#lambda]_ not to make this change because: 1. It would be an incompatible change. -2. Lambda's are neutered anyway. +2. Lambdas are neutered anyway. 3. The lambda can always be changed to a function. @@ -159,11 +159,11 @@ Accessing Function Annotations ============================== Once compiled, a function's annotations are available via the -function's ``func_annotations`` attribute. This attribute is +function's ``__annotations__`` attribute. This attribute is a mutable dictionary, mapping parameter names to an object representing the evaluated annotation expression -There is a special key in the ``func_annotations`` mapping, +There is a special key in the ``__annotations__`` mapping, ``"return"``. This key is present only if an annotation was supplied for the function's return value. @@ -172,7 +172,7 @@ For example, the following annotation:: def foo(a: 'x', b: 5 + 6, c: list) -> max(2, 9): ... -would result in a ``func_annotation`` mapping of :: +would result in an ``__annotations__`` mapping of :: {'a': 'x', 'b': 11, @@ -183,7 +183,7 @@ The ``return`` key was chosen because it cannot conflict with the name of a parameter; any attempt to use ``return`` as a parameter name would result in a ``SyntaxError``. -``func_annotations`` is an empty, mutable dictionary if there are no +``__annotations__`` is an empty, mutable dictionary if there are no annotations on the function or if the functions was created from a ``lambda`` expression. diff --git a/pep-3156.txt b/pep-3156.txt index befb92bfd..c0624a91f 100644 --- a/pep-3156.txt +++ b/pep-3156.txt @@ -10,6 +10,7 @@ Type: Standards Track Content-Type: text/x-rst Created: 12-Dec-2012 Post-History: 21-Dec-2012 +Replaces: 3153 Resolution: https://mail.python.org/pipermail/python-dev/2013-November/130419.html Abstract diff --git a/pep0/constants.py b/pep0/constants.py index 023d665d3..e40293f44 100644 --- a/pep0/constants.py +++ b/pep0/constants.py @@ -1,44 +1,43 @@ # -*- coding: utf-8 -*- -import sys - -if sys.version_info[0] > 2: - text_type = str -else: - text_type = unicode - +text_type = str title_length = 55 -column_format = (u' %(type)1s%(status)1s %(number)4s %(title)-' + - text_type(title_length) + u's %(authors)-s') +author_length = 40 +table_separator = "== ==== " + "="*title_length + " " + "="*author_length +column_format = ( + '%(type)1s%(status)1s %(number)4s %(title)-{title_length}s %(authors)-s' +).format(title_length=title_length) -header = u"""PEP: 0 +header = """\ +PEP: 0 Title: Index of Python Enhancement Proposals (PEPs) Version: N/A Last-Modified: %s -Author: David Goodger , - Barry Warsaw +Author: python-dev Status: Active Type: Informational +Content-Type: text/x-rst Created: 13-Jul-2000 """ -intro = u""" - This PEP contains the index of all Python Enhancement Proposals, - known as PEPs. PEP numbers are assigned by the PEP editors, and - once assigned are never changed[1]. The Mercurial history[2] of - the PEP texts represent their historical record. +intro = """\ +This PEP contains the index of all Python Enhancement Proposals, +known as PEPs. PEP numbers are assigned by the PEP editors, and +once assigned are never changed [1_]. The version control history [2_] of +the PEP texts represent their historical record. """ -references = u""" - [1] PEP 1: PEP Purpose and Guidelines - [2] View PEP history online - https://hg.python.org/peps/ +references = """\ +.. [1] PEP 1: PEP Purpose and Guidelines +.. [2] View PEP history online: https://github.com/python/peps """ -footer = u""" -Local Variables: -mode: indented-text -indent-tabs-mode: nil -sentence-end-double-space: t -fill-column: 70 -coding: utf-8 -End:""" +footer = """ \ +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + coding: utf-8 + End:\ +""" diff --git a/pep0/output.py b/pep0/output.py index 3f02abc51..10024c221 100644 --- a/pep0/output.py +++ b/pep0/output.py @@ -26,15 +26,13 @@ RESERVED = [ indent = u' ' -def write_column_headers(output): +def emit_column_headers(output): """Output the column headers for the PEP indices.""" - column_headers = {'status': u'', 'type': u'', 'number': u'num', - 'title': u'title', 'authors': u'owner'} + column_headers = {'status': '.', 'type': '.', 'number': 'PEP', + 'title': 'PEP Title', 'authors': 'PEP Author(s)'} + print(constants.table_separator, file=output) print(constants.column_format % column_headers, file=output) - underline_headers = {} - for key, value in column_headers.items(): - underline_headers[key] = constants.text_type(len(value) * '-') - print(constants.column_format % underline_headers, file=output) + print(constants.table_separator, file=output) def sort_peps(peps): @@ -42,6 +40,7 @@ def sort_peps(peps): and essentially dead.""" meta = [] info = [] + provisional = [] accepted = [] open_ = [] finished = [] @@ -74,6 +73,8 @@ def sort_peps(peps): info.append(pep) else: historical.append(pep) + elif pep.status == 'Provisional': + provisional.append(pep) elif pep.status in ('Accepted', 'Active'): accepted.append(pep) elif pep.status == 'Final': @@ -82,14 +83,15 @@ def sort_peps(peps): raise PEPError("unsorted (%s/%s)" % (pep.type_, pep.status), pep.filename, pep.number) - return meta, info, accepted, open_, finished, historical, deferred, dead + return (meta, info, provisional, accepted, open_, + finished, historical, deferred, dead) def verify_email_addresses(peps): authors_dict = {} for pep in peps: for author in pep.authors: - # If this is the first time we have come across an author, add him. + # If this is the first time we have come across an author, add them. if author not in authors_dict: authors_dict[author] = [author.email] else: @@ -129,112 +131,160 @@ def sort_authors(authors_dict): def normalized_last_first(name): return len(unicodedata.normalize('NFC', name.last_first)) +def emit_title(text, anchor, output, *, symbol="="): + print(".. _{anchor}:\n".format(anchor=anchor), file=output) + print(text, file=output) + print(symbol*len(text), file=output) + print(file=output) + +def emit_subtitle(text, anchor, output): + emit_title(text, anchor, output, symbol="-") + +def emit_pep_category(output, category, anchor, peps): + emit_subtitle(category, anchor, output) + emit_column_headers(output) + for pep in peps: + print(pep, file=output) + print(constants.table_separator, file=output) + print(file=output) def write_pep0(peps, output=sys.stdout): + # PEP metadata today = datetime.date.today().strftime("%Y-%m-%d") print(constants.header % today, file=output) print(file=output) - print(u"Introduction", file=output) + # Introduction + emit_title("Introduction", "intro", output) print(constants.intro, file=output) print(file=output) - print(u"Index by Category", file=output) + # PEPs by category + (meta, info, provisional, accepted, open_, + finished, historical, deferred, dead) = sort_peps(peps) + emit_title("Index by Category", "by-category", output) + emit_pep_category( + category="Meta-PEPs (PEPs about PEPs or Processes)", + anchor="by-category-meta", + peps=meta, + output=output, + ) + emit_pep_category( + category="Other Informational PEPs", + anchor="by-category-other-info", + peps=info, + output=output, + ) + emit_pep_category( + category="Provisional PEPs (provisionally accepted; interface may still change)", + anchor="by-category-provisional", + peps=provisional, + output=output, + ) + emit_pep_category( + category="Accepted PEPs (accepted; may not be implemented yet)", + anchor="by-category-accepted", + peps=accepted, + output=output, + ) + emit_pep_category( + category="Open PEPs (under consideration)", + anchor="by-category-open", + peps=open_, + output=output, + ) + emit_pep_category( + category="Finished PEPs (done, with a stable interface)", + anchor="by-category-finished", + peps=finished, + output=output, + ) + emit_pep_category( + category="Historical Meta-PEPs and Informational PEPs", + anchor="by-category-historical", + peps=historical, + output=output, + ) + emit_pep_category( + category="Deferred PEPs (postponed pending further research or updates)", + anchor="by-category-deferred", + peps=deferred, + output=output, + ) + emit_pep_category( + category="Abandoned, Withdrawn, and Rejected PEPs", + anchor="by-category-abandoned", + peps=dead, + output=output, + ) print(file=output) - write_column_headers(output) - (meta, info, accepted, open_, finished, - historical, deferred, dead) = sort_peps(peps) - print(file=output) - print(u" Meta-PEPs (PEPs about PEPs or Processes)", file=output) - print(file=output) - for pep in meta: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Other Informational PEPs", file=output) - print(file=output) - for pep in info: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Accepted PEPs (accepted; may not be implemented yet)", file=output) - print(file=output) - for pep in accepted: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Open PEPs (under consideration)", file=output) - print(file=output) - for pep in open_: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Finished PEPs (done, implemented in code repository)", file=output) - print(file=output) - for pep in finished: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Historical Meta-PEPs and Informational PEPs", file=output) - print(file=output) - for pep in historical: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Deferred PEPs", file=output) - print(file=output) - for pep in deferred: - print(constants.text_type(pep), file=output) - print(file=output) - print(u" Abandoned, Withdrawn, and Rejected PEPs", file=output) - print(file=output) - for pep in dead: - print(constants.text_type(pep), file=output) - print(file=output) - print(file=output) - print(u"Numerical Index", file=output) - print(file=output) - write_column_headers(output) + # PEPs by number + emit_title("Numerical Index", "by-pep-number", output) + emit_column_headers(output) prev_pep = 0 for pep in peps: if pep.number - prev_pep > 1: print(file=output) print(constants.text_type(pep), file=output) prev_pep = pep.number + print(constants.table_separator, file=output) print(file=output) - print(file=output) - print(u'Reserved PEP Numbers', file=output) - print(file=output) - write_column_headers(output) + # Reserved PEP numbers + emit_title('Reserved PEP Numbers', "reserved", output) + emit_column_headers(output) for number, claimants in sorted(RESERVED): print(constants.column_format % { - 'type': '', - 'status': '', + 'type': '.', + 'status': '.', 'number': number, 'title': 'RESERVED', 'authors': claimants, }, file=output) + print(constants.table_separator, file=output) print(file=output) - print(file=output) - print(u"Key", file=output) - print(file=output) - for type_ in PEP.type_values: + # PEP types key + emit_title("PEP Types Key", "type-key", output) + for type_ in sorted(PEP.type_values): print(u" %s - %s PEP" % (type_[0], type_), file=output) + print(file=output) print(file=output) - for status in PEP.status_values: - print(u" %s - %s proposal" % (status[0], status), file=output) + # PEP status key + emit_title("PEP Status Key", "status-key", output) + for status in sorted(PEP.status_values): + # Draft PEPs have no status displayed, Active shares a key with Accepted + if status in ("Active", "Draft"): + continue + if status == "Accepted": + msg = " A - Accepted (Standards Track only) or Active proposal" + else: + msg = " {status[0]} - {status} proposal".format(status=status) + print(msg, file=output) + print(file=output) print(file=output) - print(file=output) - print(u"Owners", file=output) - print(file=output) + # PEP owners + emit_title("Authors/Owners", "authors", output) authors_dict = verify_email_addresses(peps) max_name = max(authors_dict.keys(), key=normalized_last_first) max_name_len = len(max_name.last_first) - print(u" %s %s" % ('name'.ljust(max_name_len), 'email address'), file=output) - print(u" %s %s" % ((len('name')*'-').ljust(max_name_len), - len('email address')*'-'), file=output) + author_table_separator = "="*max_name_len + " " + "="*len("email address") + print(author_table_separator, file=output) + _author_header_fmt = "{name:{max_name_len}} Email Address" + print(_author_header_fmt.format(name="Name", max_name_len=max_name_len), file=output) + print(author_table_separator, file=output) sorted_authors = sort_authors(authors_dict) + _author_fmt = "{author.last_first:{max_name_len}} {author_email}" for author in sorted_authors: # Use the email from authors_dict instead of the one from 'author' as # the author instance may have an empty email. - print((u" %s %s" % - (author.last_first.ljust(max_name_len), authors_dict[author])), file=output) + _entry = _author_fmt.format( + author=author, + author_email=authors_dict[author], + max_name_len=max_name_len, + ) + print(_entry, file=output) + print(author_table_separator, file=output) print(file=output) print(file=output) - print(u"References", file=output) - print(file=output) + # References for introduction footnotes + emit_title("References", "references", output) print(constants.references, file=output) print(constants.footer, file=output) diff --git a/pep0/pep.py b/pep0/pep.py index 1b84dd35a..b3194afc2 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -99,11 +99,11 @@ class Author(object): name_parts = self.last.split() for index, part in enumerate(name_parts): if part[0].isupper(): + base = u' '.join(name_parts[index:]).lower() break else: - raise ValueError("last name missing a capital letter: %r" - % name_parts) - base = u' '.join(name_parts[index:]).lower() + # If no capitals, use the whole string + base = self.last.lower() return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') def _last_name(self, full_name): @@ -169,7 +169,8 @@ class PEP(object): type_values = (u"Standards Track", u"Informational", u"Process") # Valid values for the Status header. # Active PEPs can only be for Informational or Process PEPs. - status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", + status_values = (u"Accepted", u"Provisional", + u"Rejected", u"Withdrawn", u"Deferred", u"Final", u"Active", u"Draft", u"Superseded") def __init__(self, pep_file): @@ -229,6 +230,11 @@ class PEP(object): raise PEPError("Only Process and Informational PEPs may " "have an Active status", pep_file.name, self.number) + # Special case for Provisional PEPs. + if (status == u"Provisional" and self.type_ != "Standards Track"): + raise PEPError("Only Standards Track PEPs may " + "have a Provisional status", pep_file.name, + self.number) self.status = status # 'Author'. authors_and_emails = self._parse_author(metadata['Author']) diff --git a/pep2html.py b/pep2html.py index a7799713e..86fbef425 100755 --- a/pep2html.py +++ b/pep2html.py @@ -235,7 +235,7 @@ def fixfile(inpath, input_lines, outfile): else: mailtos.append(part) v = COMMASPACE.join(mailtos) - elif k.lower() in ('replaces', 'replaced-by', 'requires'): + elif k.lower() in ('replaces', 'superseded-by', 'requires'): otherpeps = '' for otherpep in re.split(',?\s+', v): otherpep = int(otherpep) @@ -296,7 +296,7 @@ def fixfile(inpath, input_lines, outfile): print(re.sub( parts[1], '%s' % (url, parts[1]), - line, 1), end=' ', file=outfile) + line, 1), end='', file=outfile) continue elif parts and '@' in parts[-1]: # This is a pep email address line, so filter it. @@ -305,7 +305,7 @@ def fixfile(inpath, input_lines, outfile): print('
', file=outfile)
                         need_pre = 0
                     print(re.sub(
-                        parts[-1], url, line, 1), end=' ', file=outfile)
+                        parts[-1], url, line, 1), end='', file=outfile)
                     continue
             line = fixpat.sub(lambda x, c=inpath: fixanchor(c, x), line)
             if need_pre:
@@ -409,7 +409,7 @@ class PEPHeaders(Transform):
                 for node in para:
                     if isinstance(node, nodes.reference):
                         node.replace_self(peps.mask_email(node, pep))
-            elif name in ('replaces', 'replaced-by', 'requires'):
+            elif name in ('replaces', 'superseded-by', 'requires'):
                 newbody = []
                 space = nodes.Text(' ')
                 for refpep in re.split(r',?\s+', body.astext()):
diff --git a/pep2pyramid.py b/pep2pyramid.py
index 71f827951..e41891da0 100755
--- a/pep2pyramid.py
+++ b/pep2pyramid.py
@@ -224,7 +224,7 @@ def fixfile(inpath, input_lines, outfile):
                 else:
                     mailtos.append(part)
             v = COMMASPACE.join(mailtos)
-        elif k.lower() in ('replaces', 'replaced-by', 'requires'):
+        elif k.lower() in ('replaces', 'superseded-by', 'requires'):
             otherpeps = ''
             for otherpep in re.split(',?\s+', v):
                 otherpep = int(otherpep)
diff --git a/pep2rss.py b/pep2rss.py
index e0e5c86b0..71e2c413c 100755
--- a/pep2rss.py
+++ b/pep2rss.py
@@ -1,22 +1,23 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # usage: pep-hook.py $REPOS $REV
 # (standard post-commit args)
 
 import os, glob, time, datetime, stat, re, sys
-import codecs
 import PyRSS2Gen as rssgen
 
 RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')
 
 def firstline_startingwith(full_path, text):
-    for line in codecs.open(full_path, encoding="utf-8"):
+    for line in open(full_path, encoding="utf-8"):
         if line.startswith(text):
             return line[len(text):].strip()
     return None
 
-# get list of peps with creation time (from "Created:" string in pep .txt)
+# get list of peps with creation time
+# (from "Created:" string in pep .rst or .txt)
 peps = glob.glob('pep-*.txt')
+peps.extend(glob.glob('pep-*.rst'))
 def pep_creation_dt(full_path):
     created_str = firstline_startingwith(full_path, 'Created:')
     # bleh, I was hoping to avoid re but some PEPs editorialize
@@ -69,5 +70,5 @@ rss = rssgen.RSS2(
     lastBuildDate = datetime.datetime.now(),
     items = items)
 
-with open(RSS_PATH, 'w') as fp:
-    fp.write(rss.to_xml())
+with open(RSS_PATH, 'w', encoding="utf-8") as fp:
+    fp.write(rss.to_xml(encoding="utf-8"))