diff --git a/peps/pep-0723.rst b/peps/pep-0723.rst index 90170af41..a360d1845 100644 --- a/peps/pep-0723.rst +++ b/peps/pep-0723.rst @@ -4,7 +4,7 @@ Author: Ofek Lev Sponsor: Adam Turner PEP-Delegate: Brett Cannon Discussions-To: https://discuss.python.org/t/31151 -Status: Draft +Status: Provisional Type: Standards Track Topic: Packaging Content-Type: text/x-rst @@ -13,6 +13,7 @@ Post-History: `04-Aug-2023 `__, `06-Aug-2023 `__, `23-Aug-2023 `__, Replaces: 722 +Resolution: https://discuss.python.org/t/36763 Abstract @@ -101,14 +102,39 @@ This PEP defines a metadata comment block format loosely inspired [2]_ by __ https://docutils.sourceforge.io/docs/ref/rst/directives.html -Any Python script may have top-level comment blocks that start with the line -``# /// TYPE`` where ``TYPE`` determines how to process the content, and ends -with the line ``# ///``. Every line between these two lines MUST be a comment -starting with ``#``. If there are characters after the ``#`` then the first -character MUST be a space. The embedded content is formed by taking away the -first two characters of each line if the second character is a space, otherwise -just the first character (which means the line consists of only a single -``#``). +Any Python script may have top-level comment blocks that MUST start with the +line ``# /// TYPE`` where ``TYPE`` determines how to process the content. That +is: a single ``#``, followed by a single space, followed by three forward +slashes, followed by a single space, followed by the type of metadata. Block +MUST end with the line ``# ///``. That is: a single ``#``, followed by a single +space, followed by three forward slashes. The ``TYPE`` MUST only consist of +ASCII letters, numbers and hyphens. + +Every line between these two lines (``# /// TYPE`` and ``# ///``) MUST be a +comment starting with ``#``. If there are characters after the ``#`` then the +first character MUST be a space. The embedded content is formed by taking away +the first two characters of each line if the second character is a space, +otherwise just the first character (which means the line consists of only a +single ``#``). + +Precedence for an ending line ``# ///`` is given when the next line is not +a valid embedded content line as described above. For example, the following +is a single fully valid block: + +.. code:: python + + # /// some-toml + # embedded-csharp = """ + # /// + # /// text + # /// + # /// + # public class MyClass { } + # """ + # /// + +A starting line MUST NOT be placed between another starting line and its ending +line. In such cases tools MAY produce an error. Unclosed blocks MUST be ignored. When there are multiple comment blocks of the same ``TYPE`` defined, tools MUST produce an error. @@ -219,7 +245,11 @@ higher. if len(matches) > 1: raise ValueError(f'Multiple {name} blocks found') elif len(matches) == 1: - return tomllib.loads(matches[0]) + content = ''.join( + line[2:] if line.startswith('# ') else line[1:] + for line in matches[0].group('content').splitlines(keepends=True) + ) + return tomllib.loads(content) else: return None @@ -258,6 +288,23 @@ Note that this example used a library that preserves TOML formatting. This is not a requirement for editing by any means but rather is a "nice to have" feature. +The following is an example of how to read a stream of arbitrary metadata +blocks. + +.. code:: python + + import re + from typing import Iterator + + REGEX = r'(?m)^# /// (?P[a-zA-Z0-9-]+)$\s(?P(^#(| .*)$\s)+)^# ///$' + + def stream(script: str) -> Iterator[tuple[str, str]]: + for match in re.finditer(REGEX, script): + yield match.group('type'), ''.join( + line[2:] if line.startswith('# ') else line[1:] + for line in match.group('content').splitlines(keepends=True) + ) + Backwards Compatibility =======================