713 lines
62 KiB
HTML
713 lines
62 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 701 – Syntactic formalization of f-strings | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0701/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 701 – Syntactic formalization of f-strings | peps.python.org'>
|
||
<meta property="og:description" content="This document proposes to lift some of the restrictions originally formulated in PEP 498 and to provide a formalized grammar for f-strings that can be integrated into the parser directly. The proposed syntactic formalization of f-strings will have some ...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0701/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="This document proposes to lift some of the restrictions originally formulated in PEP 498 and to provide a formalized grammar for f-strings that can be integrated into the parser directly. The proposed syntactic formalization of f-strings will have some ...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 701</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 701 – Syntactic formalization of f-strings</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Pablo Galindo <pablogsal at python.org>,
|
||
Batuhan Taskaya <batuhan at python.org>,
|
||
Lysandros Nikolaou <lisandrosnik at gmail.com>,
|
||
Marta Gómez Macías <cyberwitch at google.com></dd>
|
||
<dt class="field-even">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://discuss.python.org/t/pep-701-syntactic-formalization-of-f-strings/22046">Discourse thread</a></dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative proposal accepted for implementation">Accepted</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">15-Nov-2022</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.12</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://discuss.python.org/t/pep-701-syntactic-formalization-of-f-strings/22046" title="Discourse thread">19-Dec-2022</a></dd>
|
||
<dt class="field-even">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://discuss.python.org/t/pep-701-syntactic-formalization-of-f-strings/22046/119">14-Mar-2023</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#handling-of-f-string-debug-expressions">Handling of f-string debug expressions</a></li>
|
||
<li><a class="reference internal" href="#new-tokens">New tokens</a></li>
|
||
<li><a class="reference internal" href="#changes-to-the-tokenize-module">Changes to the tokenize module</a></li>
|
||
<li><a class="reference internal" href="#how-to-produce-these-new-tokens">How to produce these new tokens</a></li>
|
||
<li><a class="reference internal" href="#consequences-of-the-new-grammar">Consequences of the new grammar</a></li>
|
||
<li><a class="reference internal" href="#considerations-regarding-quote-reuse">Considerations regarding quote reuse</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#rejected-ideas">Rejected Ideas</a></li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This document proposes to lift some of the restrictions originally formulated in
|
||
<a class="pep reference internal" href="../pep-0498/" title="PEP 498 – Literal String Interpolation">PEP 498</a> and to provide a formalized grammar for f-strings that can be
|
||
integrated into the parser directly. The proposed syntactic formalization of
|
||
f-strings will have some small side-effects on how f-strings are parsed and
|
||
interpreted, allowing for a considerable number of advantages for end users and
|
||
library developers, while also dramatically reducing the maintenance cost of
|
||
the code dedicated to parsing f-strings.</p>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>When f-strings were originally introduced in <a class="pep reference internal" href="../pep-0498/" title="PEP 498 – Literal String Interpolation">PEP 498</a>, the specification was
|
||
provided without providing a formal grammar for f-strings. Additionally, the
|
||
specification contains several restrictions that are imposed so the parsing of
|
||
f-strings could be implemented into CPython without modifying the existing
|
||
lexer. These limitations have been recognized previously and previous attempts
|
||
have been made to lift them in <a class="pep reference internal" href="../pep-0536/" title="PEP 536 – Final Grammar for Literal String Interpolation">PEP 536</a>, but <a class="reference external" href="https://mail.python.org/archives/list/python-dev@python.org/thread/N43O4KNLZW4U7YZC4NVPCETZIVRDUVU2/#NM2A37THVIXXEYR4J5ZPTNLXGGUNFRLZ">none of this work was ever implemented</a>.
|
||
Some of these limitations (collected originally by <a class="pep reference internal" href="../pep-0536/" title="PEP 536 – Final Grammar for Literal String Interpolation">PEP 536</a>) are:</p>
|
||
<ol class="arabic">
|
||
<li>It is impossible to use the quote character delimiting the f-string
|
||
within the expression portion:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s1">'Magic wand: </span><span class="si">{</span><span class="w"> </span><span class="n">bag</span><span class="p">[</span><span class="s1">'wand'</span><span class="p">]</span><span class="w"> </span><span class="si">}</span><span class="s1">'</span>
|
||
<span class="go"> ^</span>
|
||
<span class="go">SyntaxError: invalid syntax</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>A previously considered way around it would lead to escape sequences
|
||
in executed code and is prohibited in f-strings:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>>>> f'Magic wand { bag[\'wand\'] } string'
|
||
SyntaxError: f-string expression portion cannot include a backslash
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>Comments are forbidden even in multi-line f-strings:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>>>> f'''A complex trick: {
|
||
... bag['bag'] # recursive bags!
|
||
... }'''
|
||
SyntaxError: f-string expression part cannot include '#'
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>Arbitrary nesting of expressions without expansion of escape sequences is
|
||
available in many other languages that employ a string interpolation
|
||
method that uses expressions instead of just variable names. Some examples:<div class="highlight-text notranslate"><div class="highlight"><pre><span></span># Ruby
|
||
"#{ "#{1+2}" }"
|
||
|
||
# JavaScript
|
||
`${`${1+2}`}`
|
||
|
||
# Swift
|
||
"\("\(1+2)")"
|
||
|
||
# C#
|
||
$"{$"{1+2}"}"
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
</ol>
|
||
<p>These limitations serve no purpose from a language user perspective and
|
||
can be lifted by giving f-string literals a regular grammar without exceptions
|
||
and implementing it using dedicated parse code.</p>
|
||
<p>The other issue that f-strings have is that the current implementation in
|
||
CPython relies on tokenising f-strings as <code class="docutils literal notranslate"><span class="pre">STRING</span></code> tokens and a post processing of
|
||
these tokens. This has the following problems:</p>
|
||
<ol class="arabic simple">
|
||
<li>It adds a considerable maintenance cost to the CPython parser. This is because
|
||
the parsing code needs to be written by hand, which has historically led to a
|
||
considerable number of inconsistencies and bugs. Writing and maintaining parsing
|
||
code by hand in C has always been considered error prone and dangerous as it needs
|
||
to deal with a lot of manual memory management over the original lexer buffers.</li>
|
||
<li>The f-string parsing code is not able to use the new improved error message mechanisms
|
||
that the new PEG parser, originally introduced in <a class="pep reference internal" href="../pep-0617/" title="PEP 617 – New PEG parser for CPython">PEP 617</a>, has allowed. The
|
||
improvements that these error messages brought has been greatly celebrated but
|
||
unfortunately f-strings cannot benefit from them because they are parsed in a
|
||
separate piece of the parsing machinery. This is especially unfortunate, since
|
||
there are several syntactical features of f-strings that can be confusing due
|
||
to the different implicit tokenization that happens inside the expression
|
||
part (for instance <code class="docutils literal notranslate"><span class="pre">f"{y:=3}"</span></code> is not an assignment expression).</li>
|
||
<li>Other Python implementations have no way to know if they have implemented
|
||
f-strings correctly because contrary to other language features, they are not
|
||
part of the <a class="reference external" href="https://docs.python.org/3/reference/lexical_analysis.html#f-strings" title="(in Python v3.13)"><span class="xref std std-ref">official Python grammar</span></a>.
|
||
This is important because several prominent
|
||
alternative implementations are using CPython’s PEG parser, <a class="reference external" href="https://foss.heptapod.net/pypy/pypy/-/commit/fe120f89bf07e64a41de62b224e4a3d80e0fe0d4/pipelines?ref=branch%2Fpy3.9">such as PyPy</a>,
|
||
and/or are basing their grammars on the official PEG grammar. The
|
||
fact that f-strings use a separate parser prevents these alternative implementations
|
||
from leveraging the official grammar and benefiting from improvements in error messages derived
|
||
from the grammar.</li>
|
||
</ol>
|
||
<p>A version of this proposal was originally <a class="reference external" href="https://mail.python.org/archives/list/python-dev@python.org/thread/54N3MOYVBDSJQZTU6MTCPLUPIFSDN5IS/#SAYU6SMP4KT7G7AQ6WVQYUDOSZPKHJMS">discussed on Python-Dev</a> and
|
||
<a class="reference external" href="https://pyfound.blogspot.com/2022/05/the-2022-python-language-summit-f.html">presented at the Python Language Summit 2022</a> where it was enthusiastically
|
||
received.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>By building on top of the new Python PEG Parser (<a class="pep reference internal" href="../pep-0617/" title="PEP 617 – New PEG parser for CPython">PEP 617</a>), this PEP proposes
|
||
to redefine “f-strings”, especially emphasizing the clear separation of the
|
||
string component and the expression (or replacement, <code class="docutils literal notranslate"><span class="pre">{...}</span></code>) component. <a class="pep reference internal" href="../pep-0498/" title="PEP 498 – Literal String Interpolation">PEP 498</a>
|
||
summarizes the syntactical part of “f-strings” as the following:</p>
|
||
<blockquote>
|
||
<div>In Python source code, an f-string is a literal string, prefixed with ‘f’, which
|
||
contains expressions inside braces. The expressions are replaced with their values.</div></blockquote>
|
||
<p>However, <a class="pep reference internal" href="../pep-0498/" title="PEP 498 – Literal String Interpolation">PEP 498</a> also contained a formal list of exclusions on what
|
||
can or cannot be contained inside the expression component (primarily due to the
|
||
limitations of the existing parser). By clearly establishing the formal grammar, we
|
||
now also have the ability to define the expression component of an f-string as truly “any
|
||
applicable Python expression” (in that particular context) without being bound
|
||
by the limitations imposed by the details of our implementation.</p>
|
||
<p>The formalization effort and the premise above also has a significant benefit for
|
||
Python programmers due to its ability to simplify and eliminate the obscure
|
||
limitations. This reduces the mental burden and the cognitive complexity of
|
||
f-string literals (as well as the Python language in general).</p>
|
||
<ol class="arabic">
|
||
<li>The expression component can include any string literal that a normal Python expression
|
||
can include. This opens up the possibility of nesting string literals (formatted or
|
||
not) inside the expression component of an f-string with the same quote type (and length):<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s2">"These are the things: </span><span class="si">{</span><span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">things</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
|
||
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">source</span><span class="o">.</span><span class="n">removesuffix</span><span class="p">(</span><span class="s2">".py"</span><span class="p">)</span><span class="si">}</span><span class="s2">.c: $(srcdir)/</span><span class="si">{</span><span class="n">source</span><span class="si">}</span><span class="s2">"</span>
|
||
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"infinite"</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span> <span class="o">+</span> <span class="s2">" "</span> <span class="o">+</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"nesting!!!"</span><span class="si">}</span><span class="s2">"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This “feature” is not universally agreed to be desirable, and some users find this unreadable.
|
||
For a discussion on the different views on this, see the <a class="reference internal" href="#considerations-regarding-quote-reuse">considerations regarding quote reuse</a> section.</p>
|
||
</li>
|
||
<li>Another issue that has felt unintuitive to most is the lack of support for backslashes
|
||
within the expression component of an f-string. One example that keeps coming up is including
|
||
a newline character in the expression part for joining containers. For example:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"hello"</span><span class="p">,</span> <span class="s2">"world"</span><span class="p">]</span>
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="go">File "<stdin>", line 1</span>
|
||
<span class="go"> f"{'\n'.join(a)}"</span>
|
||
<span class="go"> ^</span>
|
||
<span class="go">SyntaxError: f-string expression part cannot include a backslash</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>A common work-around for this was to either assign the newline to an intermediate variable or
|
||
pre-create the whole string prior to creating the f-string:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"hello"</span><span class="p">,</span> <span class="s2">"world"</span><span class="p">]</span>
|
||
<span class="gp">>>> </span><span class="n">joined</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">joined</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="go">'hello\nworld'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It only feels natural to allow backslashes in the expression part now that the new PEG parser
|
||
can easily support it.</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"hello"</span><span class="p">,</span> <span class="s2">"world"</span><span class="p">]</span>
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">a</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="go">'hello\nworld'</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>Before the changes proposed in this document, there was no explicit limit in
|
||
how f-strings can be nested, but the fact that string quotes cannot be reused
|
||
inside the expression component of f-strings made it impossible to nest
|
||
f-strings arbitrarily. In fact, this is the most nested-fstring that can be
|
||
written:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s2">"""</span><span class="si">{</span><span class="sa">f</span><span class="s1">'''</span><span class="si">{</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="mi">1</span><span class="o">+</span><span class="mi">1</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s1">'</span><span class="si">}</span><span class="s1">'''</span><span class="si">}</span><span class="s2">"""</span>
|
||
<span class="go">'2'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>As this PEP allows placing <strong>any</strong> valid Python expression inside the
|
||
expression component of the f-strings, it is now possible to reuse quotes and
|
||
therefore is possible to nest f-strings arbitrarily:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="mi">1</span><span class="o">+</span><span class="mi">1</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="go">'2'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Although this is just a consequence of allowing arbitrary expressions, the
|
||
authors of this PEP do not believe that this is a fundamental benefit and we
|
||
have decided that the language specification will not explicitly mandate that
|
||
this nesting can be arbitrary. This is because allowing arbitrarily-deep
|
||
nesting imposes a lot of extra complexity to the lexer implementation
|
||
(particularly as lexer/parser pipelines need to allow “untokenizing” to
|
||
support the ‘f-string debugging expressions’ and this is especially taxing when
|
||
arbitrary nesting is allowed). Implementations are therefore free to impose a
|
||
limit on the nesting depth if they need to. Note that this is not an uncommon
|
||
situation, as the CPython implementation already imposes several limits all
|
||
over the place, including a limit on the nesting depth of parentheses and
|
||
brackets, a limit on the nesting of the blocks, a limit in the number of
|
||
branches in <code class="docutils literal notranslate"><span class="pre">if</span></code> statements, a limit on the number of expressions in
|
||
star-unpacking, etc.</p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<p>The formal proposed PEG grammar specification for f-strings is (see <a class="pep reference internal" href="../pep-0617/" title="PEP 617 – New PEG parser for CPython">PEP 617</a>
|
||
for details on the syntax):</p>
|
||
<div class="highlight-peg notranslate"><div class="highlight"><pre><span></span><span class="nc">fstring</span>
|
||
<span class="o">|</span> <span class="nc">FSTRING_START</span> <span class="nc">fstring_middle</span><span class="o">*</span> <span class="nc">FSTRING_END</span>
|
||
<span class="nc">fstring_middle</span>
|
||
<span class="o">|</span> <span class="nc">fstring_replacement_field</span>
|
||
<span class="o">|</span> <span class="nc">FSTRING_MIDDLE</span>
|
||
<span class="nc">fstring_replacement_field</span>
|
||
<span class="o">|</span> <span class="s1">'{'</span> <span class="p">(</span><span class="nc">yield_expr</span> <span class="o">|</span> <span class="nc">star_expressions</span><span class="p">)</span> <span class="s2">"="</span><span class="o">?</span> <span class="p">[</span><span class="s"> "!" NAME </span><span class="p">]</span> <span class="p">[</span><span class="s"> ':' fstring_format_spec* </span><span class="p">]</span> <span class="s1">'}'</span>
|
||
<span class="nc">fstring_format_spec</span><span class="o">:</span>
|
||
<span class="o">|</span> <span class="nc">FSTRING_MIDDLE</span>
|
||
<span class="o">|</span> <span class="nc">fstring_replacement_field</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The new tokens (<code class="docutils literal notranslate"><span class="pre">FSTRING_START</span></code>, <code class="docutils literal notranslate"><span class="pre">FSTRING_MIDDLE</span></code>, <code class="docutils literal notranslate"><span class="pre">FSTRING_END</span></code>) are defined
|
||
<a class="reference internal" href="#new-tokens">later in this document</a>.</p>
|
||
<p>This PEP leaves up to the implementation the level of f-string nesting allowed
|
||
(f-strings within the expression parts of other f-strings) but <strong>specifies a
|
||
lower bound of 5 levels of nesting</strong>. This is to ensure that users can have a
|
||
reasonable expectation of being able to nest f-strings with “reasonable” depth.
|
||
This PEP implies that limiting nesting is <strong>not part of the language
|
||
specification</strong> but also the language specification <strong>doesn’t mandate arbitrary
|
||
nesting</strong>.</p>
|
||
<p>Similarly, this PEP leaves up to the implementation the level of expression nesting
|
||
in format specifiers but <strong>specifies a lower bound of 2 levels of nesting</strong>. This means
|
||
that the following should always be valid:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="s1">''</span><span class="si">:</span><span class="s2">*^</span><span class="si">{</span><span class="mi">1</span><span class="si">:{</span><span class="mi">1</span><span class="si">}</span><span class="se">}}</span><span class="s2">"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>but the following can be valid or not depending on the implementation:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="s1">''</span><span class="si">:</span><span class="s2">*^</span><span class="si">{</span><span class="mi">1</span><span class="si">:{</span><span class="mi">1</span><span class="si">:{</span><span class="mi">1</span><span class="si">}</span><span class="se">}}</span><span class="si">}</span><span class="s2">"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The new grammar will preserve the Abstract Syntax Tree (AST) of the current
|
||
implementation. This means that no semantic changes will be introduced by this
|
||
PEP on existing code that uses f-strings.</p>
|
||
<section id="handling-of-f-string-debug-expressions">
|
||
<h3><a class="toc-backref" href="#handling-of-f-string-debug-expressions" role="doc-backlink">Handling of f-string debug expressions</a></h3>
|
||
<p>Since Python 3.8, f-strings can be used to debug expressions by using the
|
||
<code class="docutils literal notranslate"><span class="pre">=</span></code> operator. For example:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="mi">1</span><span class="o">+</span><span class="mi">1</span><span class="si">=}</span><span class="s2">"</span>
|
||
<span class="go">'1+1=2'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This semantics were not introduced formally in a PEP and they were implemented
|
||
in the current string parser as a special case in <a class="reference external" href="https://bugs.python.org/issue?@action=redirect&bpo=36817">bpo-36817</a> and documented in
|
||
<a class="reference external" href="https://docs.python.org/3/reference/lexical_analysis.html#f-strings">the f-string lexical analysis section</a>.</p>
|
||
<p>This feature is not affected by the changes proposed in this PEP but is
|
||
important to specify that the formal handling of this feature requires the lexer
|
||
to be able to “untokenize” the expression part of the f-string. This is not a
|
||
problem for the current string parser as it can operate directly on the string
|
||
token contents. However, incorporating this feature into a given parser
|
||
implementation requires the lexer to keep track of the raw string contents of
|
||
the expression part of the f-string and make them available to the parser when
|
||
the parse tree is constructed for f-string nodes. A pure “untokenization” is not
|
||
enough because as specified currently, f-string debug expressions preserve whitespace in the expression,
|
||
including spaces after the <code class="docutils literal notranslate"><span class="pre">{</span></code> and the <code class="docutils literal notranslate"><span class="pre">=</span></code> characters. This means that the
|
||
raw string contents of the expression part of the f-string must be kept intact
|
||
and not just the associated tokens.</p>
|
||
<p>How parser/lexer implementations deal with this problem is of course up to the
|
||
implementation.</p>
|
||
</section>
|
||
<section id="new-tokens">
|
||
<h3><a class="toc-backref" href="#new-tokens" role="doc-backlink">New tokens</a></h3>
|
||
<p>Three new tokens are introduced: <code class="docutils literal notranslate"><span class="pre">FSTRING_START</span></code>, <code class="docutils literal notranslate"><span class="pre">FSTRING_MIDDLE</span></code> and
|
||
<code class="docutils literal notranslate"><span class="pre">FSTRING_END</span></code>. Different lexers may have different implementations that may be
|
||
more efficient than the ones proposed here given the context of the particular
|
||
implementation. However, the following definitions will be used as part of the
|
||
public APIs of CPython (such as the <code class="docutils literal notranslate"><span class="pre">tokenize</span></code> module) and are also provided
|
||
as a reference so that the reader can have a better understanding of the
|
||
proposed grammar changes and how the tokens are used:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">FSTRING_START</span></code>: This token includes the f-string prefix (<code class="docutils literal notranslate"><span class="pre">f</span></code>/<code class="docutils literal notranslate"><span class="pre">F</span></code>/<code class="docutils literal notranslate"><span class="pre">fr</span></code>) and the opening quote(s).</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">FSTRING_MIDDLE</span></code>: This token includes a portion of text inside the string that’s not part of the
|
||
expression part and isn’t an opening or closing brace. This can include the text between the opening quote
|
||
and the first expression brace (<code class="docutils literal notranslate"><span class="pre">{</span></code>), the text between two expression braces (<code class="docutils literal notranslate"><span class="pre">}</span></code> and <code class="docutils literal notranslate"><span class="pre">{</span></code>) and the text
|
||
between the last expression brace (<code class="docutils literal notranslate"><span class="pre">}</span></code>) and the closing quote.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">FSTRING_END</span></code>: This token includes the closing quote.</li>
|
||
</ul>
|
||
<p>These tokens are always string parts and they are semantically equivalent to the
|
||
<code class="docutils literal notranslate"><span class="pre">STRING</span></code> token with the restrictions specified. These tokens must be produced by the lexer
|
||
when lexing f-strings. This means that <strong>the tokenizer cannot produce a single token for f-strings anymore</strong>.
|
||
How the lexer emits this token is <strong>not specified</strong> as this will heavily depend on every
|
||
implementation (even the Python version of the lexer in the standard library is implemented
|
||
differently to the one used by the PEG parser).</p>
|
||
<p>As an example:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="sa">f</span><span class="s1">'some words </span><span class="si">{</span><span class="n">a</span><span class="o">+</span><span class="n">b</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1"> more words </span><span class="si">{</span><span class="n">c</span><span class="o">+</span><span class="n">d</span><span class="si">=}</span><span class="s1"> final words'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>will be tokenized as:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">FSTRING_START</span> <span class="o">-</span> <span class="s2">"f'"</span>
|
||
<span class="n">FSTRING_MIDDLE</span> <span class="o">-</span> <span class="s1">'some words '</span>
|
||
<span class="n">LBRACE</span> <span class="o">-</span> <span class="s1">'{'</span>
|
||
<span class="n">NAME</span> <span class="o">-</span> <span class="s1">'a'</span>
|
||
<span class="n">PLUS</span> <span class="o">-</span> <span class="s1">'+'</span>
|
||
<span class="n">NAME</span> <span class="o">-</span> <span class="s1">'b'</span>
|
||
<span class="n">OP</span> <span class="o">-</span> <span class="s1">':'</span>
|
||
<span class="n">FSTRING_MIDDLE</span> <span class="o">-</span> <span class="s1">'.3f'</span>
|
||
<span class="n">RBRACE</span> <span class="o">-</span> <span class="s1">'}'</span>
|
||
<span class="n">FSTRING_MIDDLE</span> <span class="o">-</span> <span class="s1">' more words '</span>
|
||
<span class="n">LBRACE</span> <span class="o">-</span> <span class="s1">'{'</span>
|
||
<span class="n">NAME</span> <span class="o">-</span> <span class="s1">'c'</span>
|
||
<span class="n">PLUS</span> <span class="o">-</span> <span class="s1">'+'</span>
|
||
<span class="n">NAME</span> <span class="o">-</span> <span class="s1">'d'</span>
|
||
<span class="n">OP</span> <span class="o">-</span> <span class="s1">'='</span>
|
||
<span class="n">RBRACE</span> <span class="o">-</span> <span class="s1">'}'</span>
|
||
<span class="n">FSTRING_MIDDLE</span> <span class="o">-</span> <span class="s1">' final words'</span>
|
||
<span class="n">FSTRING_END</span> <span class="o">-</span> <span class="s2">"'"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>while <code class="docutils literal notranslate"><span class="pre">f"""some</span> <span class="pre">words"""</span></code> will be tokenized simply as:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">FSTRING_START</span> <span class="o">-</span> <span class="s1">'f"""'</span>
|
||
<span class="n">FSTRING_MIDDLE</span> <span class="o">-</span> <span class="s1">'some words'</span>
|
||
<span class="n">FSTRING_END</span> <span class="o">-</span> <span class="s1">'"""'</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="changes-to-the-tokenize-module">
|
||
<h3><a class="toc-backref" href="#changes-to-the-tokenize-module" role="doc-backlink">Changes to the tokenize module</a></h3>
|
||
<p>The <a class="reference external" href="https://docs.python.org/3/library/tokenize.html#module-tokenize" title="(in Python v3.13)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">tokenize</span></code></a> module will be adapted to emit these tokens as described in the previous section
|
||
when parsing f-strings so tools can take advantage of this new tokenization schema and avoid having
|
||
to implement their own f-string tokenizer and parser.</p>
|
||
</section>
|
||
<section id="how-to-produce-these-new-tokens">
|
||
<h3><a class="toc-backref" href="#how-to-produce-these-new-tokens" role="doc-backlink">How to produce these new tokens</a></h3>
|
||
<p>One way existing lexers can be adapted to emit these tokens is to incorporate a
|
||
stack of “lexer modes” or to use a stack of different lexers. This is because
|
||
the lexer needs to switch from “regular Python lexing” to “f-string lexing” when
|
||
it encounters an f-string start token and as f-strings can be nested, the
|
||
context needs to be preserved until the f-string closes. Also, the “lexer mode”
|
||
inside an f-string expression part needs to behave as a “super-set” of the
|
||
regular Python lexer (as it needs to be able to switch back to f-string lexing
|
||
when it encounters the <code class="docutils literal notranslate"><span class="pre">}</span></code> terminator for the expression part as well as
|
||
handling f-string formatting and debug expressions). For reference, here is a
|
||
draft of the algorithm to modify a CPython-like tokenizer to emit these new
|
||
tokens:</p>
|
||
<ol class="arabic">
|
||
<li>If the lexer detects that an f-string is starting (by detecting the letter
|
||
‘f/F’ and one of the possible quotes) keep advancing until a valid quote is
|
||
detected (one of <code class="docutils literal notranslate"><span class="pre">"</span></code>, <code class="docutils literal notranslate"><span class="pre">"""</span></code>, <code class="docutils literal notranslate"><span class="pre">'</span></code> or <code class="docutils literal notranslate"><span class="pre">'''</span></code>) and emit a
|
||
<code class="docutils literal notranslate"><span class="pre">FSTRING_START</span></code> token with the contents captured (the ‘f/F’ and the
|
||
starting quote). Push a new tokenizer mode to the tokenizer mode stack for
|
||
“F-string tokenization”. Go to step 2.</li>
|
||
<li>Keep consuming tokens until a one of the following is encountered:<ul class="simple">
|
||
<li>A closing quote equal to the opening quote.</li>
|
||
<li>If in “format specifier mode” (see step 3), an opening brace (<code class="docutils literal notranslate"><span class="pre">{</span></code>), a
|
||
closing brace (<code class="docutils literal notranslate"><span class="pre">}</span></code>), or a newline token (<code class="docutils literal notranslate"><span class="pre">\n</span></code>).</li>
|
||
<li>If not in “format specifier mode” (see step 3), an opening brace (<code class="docutils literal notranslate"><span class="pre">{</span></code>) or
|
||
a closing brace (<code class="docutils literal notranslate"><span class="pre">}</span></code>) that is not immediately followed by another opening/closing
|
||
brace.</li>
|
||
</ul>
|
||
<p>In all cases, if the character buffer is not empty, emit a <code class="docutils literal notranslate"><span class="pre">FSTRING_MIDDLE</span></code>
|
||
token with the contents captured so far but transform any double
|
||
opening/closing braces into single opening/closing braces. Now, proceed as
|
||
follows depending on the character encountered:</p>
|
||
<ul class="simple">
|
||
<li>If a closing quote matching the opening quite is encountered go to step 4.</li>
|
||
<li>If an opening bracket (not immediately followed by another opening bracket)
|
||
is encountered, go to step 3.</li>
|
||
<li>If a closing bracket (not immediately followed by another closing bracket)
|
||
is encountered, emit a token for the closing bracket and go to step 2.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Push a new tokenizer mode to the tokenizer mode stack for “Regular Python
|
||
tokenization within f-string” and proceed to tokenize with it. This mode
|
||
tokenizes as the “Regular Python tokenization” until a <code class="docutils literal notranslate"><span class="pre">:</span></code> or a <code class="docutils literal notranslate"><span class="pre">}</span></code>
|
||
character is encountered with the same level of nesting as the opening
|
||
bracket token that was pushed when we enter the f-string part. Using this mode,
|
||
emit tokens until one of the stop points are reached. When this happens, emit
|
||
the corresponding token for the stopping character encountered and, pop the
|
||
current tokenizer mode from the tokenizer mode stack and go to step 2. If the
|
||
stopping point is a <code class="docutils literal notranslate"><span class="pre">:</span></code> character, enter step 2 in “format specifier” mode.</li>
|
||
<li>Emit a <code class="docutils literal notranslate"><span class="pre">FSTRING_END</span></code> token with the contents captured and pop the current
|
||
tokenizer mode (corresponding to “F-string tokenization”) and go back to
|
||
“Regular Python mode”.</li>
|
||
</ol>
|
||
<p>Of course, as mentioned before, it is not possible to provide a precise
|
||
specification of how this should be done for an arbitrary tokenizer as it will
|
||
depend on the specific implementation and nature of the lexer to be changed.</p>
|
||
</section>
|
||
<section id="consequences-of-the-new-grammar">
|
||
<h3><a class="toc-backref" href="#consequences-of-the-new-grammar" role="doc-backlink">Consequences of the new grammar</a></h3>
|
||
<p>All restrictions mentioned in the PEP are lifted from f-string literals, as explained below:</p>
|
||
<ul>
|
||
<li>Expression portions may now contain strings delimited with the same kind of
|
||
quote that is used to delimit the f-string literal.</li>
|
||
<li>Backslashes may now appear within expressions just like anywhere else in
|
||
Python code. In case of strings nested within f-string literals, escape sequences are
|
||
expanded when the innermost string is evaluated.</li>
|
||
<li>New lines are now allowed within expression brackets. This means that these are now allowed:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"___</span><span class="si">{</span>
|
||
<span class="gp">... </span><span class="w"> </span><span class="n">x</span>
|
||
<span class="gp">... </span><span class="si">}</span><span class="s2">___"</span>
|
||
<span class="go">'___1___'</span>
|
||
|
||
<span class="gp">>>> </span><span class="sa">f</span><span class="s2">"___</span><span class="si">{</span><span class="p">(</span>
|
||
<span class="gp">... </span><span class="w"> </span><span class="n">x</span>
|
||
<span class="gp">... </span><span class="p">)</span><span class="si">}</span><span class="s2">___"</span>
|
||
<span class="go">'___1___'</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>Comments, using the <code class="docutils literal notranslate"><span class="pre">#</span></code> character, are allowed within the expression part of an f-string.
|
||
Note that comments require that the closing bracket (<code class="docutils literal notranslate"><span class="pre">}</span></code>) of the expression part to be present in
|
||
a different line as the one the comment is in or otherwise it will be ignored as part of the comment.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="considerations-regarding-quote-reuse">
|
||
<span id="considerations-of-quote-reuse"></span><h3><a class="toc-backref" href="#considerations-regarding-quote-reuse" role="doc-backlink">Considerations regarding quote reuse</a></h3>
|
||
<p>One of the consequences of the grammar proposed here is that, as mentioned above,
|
||
f-string expressions can now contain strings delimited with the same kind of quote
|
||
that is used to delimit the external f-string literal. For example:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s2">" something </span><span class="si">{</span><span class="w"> </span><span class="n">my_dict</span><span class="p">[</span><span class="s2">"key"</span><span class="p">]</span><span class="w"> </span><span class="si">}</span><span class="s2"> something else "</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>In the <a class="reference external" href="https://discuss.python.org/t/pep-701-syntactic-formalization-of-f-strings/22046">discussion thread for this PEP</a>,
|
||
several concerns have been raised regarding this aspect and we want to collect them here,
|
||
as these should be taken into consideration when accepting or rejecting this PEP.</p>
|
||
<p>Some of these objections include:</p>
|
||
<ul class="simple">
|
||
<li>Many people find quote reuse within the same string confusing and hard to read. This is because
|
||
allowing quote reuse will violate a current property of Python as it stands today: the fact that
|
||
strings are fully delimited by two consecutive pairs of the same kind of quote, which by itself is a very simple rule.
|
||
One of the reasons quote reuse may be harder for humans to parse, leading to less readable
|
||
code, is that the quote character is the same for both start and
|
||
end (as opposed to other delimiters).</li>
|
||
<li>Some users have raised concerns that quote reuse may break some lexer and syntax highlighting tools that rely
|
||
on simple mechanisms to detect strings and f-strings, such as regular expressions or simple delimiter
|
||
matching tools. Introducing quote reuse in f-strings will either make it trickier to keep these tools
|
||
working or will break the tools altogether (as, for instance, regular expressions cannot parse arbitrary nested
|
||
structures with delimiters). The IDLE editor, included in the standard library, is an example of a
|
||
tool which may need some work to correctly apply syntax highlighting to f-strings.</li>
|
||
</ul>
|
||
<p>Here are some of the arguments in favour:</p>
|
||
<ul>
|
||
<li>Many languages that allow similar syntactic constructs (normally called “string interpolation”) allow quote
|
||
reuse and arbitrary nesting. These languages include JavaScript, Ruby, C#, Bash, Swift and many others.
|
||
The fact that many languages allow quote reuse can be a compelling argument in favour of allowing it in Python. This
|
||
is because it will make the language more familiar to users coming from other languages.</li>
|
||
<li>As many other popular languages allow quote reuse in string interpolation constructs, this means that editors
|
||
that support syntax highlighting for these languages will already have the necessary tools to support syntax
|
||
highlighting for f-strings with quote reuse in Python. This means that although the files that handle syntax
|
||
highlighting for Python will need to be updated to support this new feature, is not expected to be impossible
|
||
or very hard to do.</li>
|
||
<li>One advantage of allowing quote reuse is that it composes cleanly with other syntax. Sometimes this is referred to
|
||
as “referential transparency”. An example of this is that if we have <code class="docutils literal notranslate"><span class="pre">f(x+1)</span></code>, assuming <code class="docutils literal notranslate"><span class="pre">a</span></code> is a brand new variable, it
|
||
should behave the same as <code class="docutils literal notranslate"><span class="pre">a</span> <span class="pre">=</span> <span class="pre">x+1;</span> <span class="pre">f(a)</span></code>. And vice versa. So if we have:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">py2c</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
|
||
<span class="n">prefix</span> <span class="o">=</span> <span class="n">source</span><span class="o">.</span><span class="n">removesuffix</span><span class="p">(</span><span class="s2">".py"</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">prefix</span><span class="si">}</span><span class="s2">.c"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It should be expected that if we replace the variable <code class="docutils literal notranslate"><span class="pre">prefix</span></code> with its definition, the answer should be the same:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">py2c</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">source</span><span class="o">.</span><span class="n">removesuffix</span><span class="p">(</span><span class="s2">".py"</span><span class="p">)</span><span class="si">}</span><span class="s2">.c"</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>Code generators (like <a class="reference external" href="https://docs.python.org/3/library/ast.html#ast.unparse">ast.unparse</a> from standard library) in their
|
||
current form rely on complicated algorithms to ensure expressions within an f-string are properly suited for the context in
|
||
which they are being used. These non-trivial algorithms come with challenges such as finding an unused quote type (by tracking
|
||
the outer quotes), and generating string representations which would not include backslashes if possible. Allowing quote reuse
|
||
and backslashes would simplify the code generators which deal with f-strings considerably, as the regular Python expression logic
|
||
can be used inside and outside of f-strings without any special treatment.</li>
|
||
<li>Limiting quote reuse will considerably increase the complexity of the implementation of the proposed changes. This is because
|
||
it will force the parser to have the context that is parsing an expression part of an f-string with a given quote in order
|
||
to know if it needs to reject an expression that reuses the quote. Carrying this context around is not trivial in parsers that
|
||
can backtrack arbitrarily (such as the PEG parser). The issue becomes even more complex if we consider that f-strings can be
|
||
arbitrarily nested and therefore several quote types may need to be rejected.<p>To gather feedback from the community,
|
||
<a class="reference external" href="https://discuss.python.org/t/pep-701-syntactic-formalization-of-f-strings/22046/24">a poll</a>
|
||
has been initiated to get a sense of how the community feels about this aspect of the PEP.</p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>This PEP does not introduce any backwards incompatible syntactic or semantic changes
|
||
to the Python language. However, the <a class="reference external" href="https://docs.python.org/3/library/tokenize.html#module-tokenize" title="(in Python v3.13)"><code class="xref py py-mod docutils literal notranslate"><span class="pre">tokenize</span></code></a> module (a quasi-public part of the standard
|
||
library) will need to be updated to support the new f-string tokens (to allow tool authors
|
||
to correctly tokenize f-strings). See <a class="reference internal" href="#changes-to-the-tokenize-module">changes to the tokenize module</a> for more details regarding
|
||
how the public API of <code class="docutils literal notranslate"><span class="pre">tokenize</span></code> will be affected.</p>
|
||
</section>
|
||
<section id="how-to-teach-this">
|
||
<h2><a class="toc-backref" href="#how-to-teach-this" role="doc-backlink">How to Teach This</a></h2>
|
||
<p>As the concept of f-strings is already ubiquitous in the Python community, there is
|
||
no fundamental need for users to learn anything new. However, as the formalized grammar
|
||
allows some new possibilities, it is important that the formal grammar is added to the
|
||
documentation and explained in detail, explicitly mentioning what constructs are possible
|
||
since this PEP is aiming to avoid confusion.</p>
|
||
<p>It is also beneficial to provide users with a simple framework for understanding what can
|
||
be placed inside an f-string expression. In this case the authors think that this work will
|
||
make it even simpler to explain this aspect of the language, since it can be summarized as:</p>
|
||
<blockquote>
|
||
<div>You can place any valid Python expression inside an f-string expression.</div></blockquote>
|
||
<p>With the changes in this PEP, there is no need to clarify that string quotes are
|
||
limited to be different from the quotes of the enclosing string, because this is
|
||
now allowed: as an arbitrary Python string can contain any possible choice of
|
||
quotes, so can any f-string expression. Additionally there is no need to clarify
|
||
that certain things are not allowed in the expression part because of
|
||
implementation restrictions such as comments, new line characters or
|
||
backslashes.</p>
|
||
<p>The only “surprising” difference is that as f-strings allow specifying a
|
||
format, expressions that allow a <code class="docutils literal notranslate"><span class="pre">:</span></code> character at the top level still need to be
|
||
enclosed in parenthesis. This is not new to this work, but it is important to
|
||
emphasize that this restriction is still in place. This allows for an easier
|
||
modification of the summary:</p>
|
||
<blockquote>
|
||
<div>You can place any valid Python expression inside
|
||
an f-string expression, and everything after a <code class="docutils literal notranslate"><span class="pre">:</span></code> character at the top level will
|
||
be identified as a format specification.</div></blockquote>
|
||
</section>
|
||
<section id="reference-implementation">
|
||
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
||
<p>A reference implementation can be found in the <a class="reference external" href="https://github.com/we-like-parsers/cpython/tree/fstring-grammar">implementation</a> fork.</p>
|
||
</section>
|
||
<section id="rejected-ideas">
|
||
<h2><a class="toc-backref" href="#rejected-ideas" role="doc-backlink">Rejected Ideas</a></h2>
|
||
<ol class="arabic">
|
||
<li>Although we think the readability arguments that have been raised against
|
||
allowing quote reuse in f-string expressions are valid and very important,
|
||
we have decided to propose not rejecting quote reuse in f-strings at the parser
|
||
level. The reason is that one of the cornerstones of this PEP is to reduce the
|
||
complexity and maintenance of parsing f-strings in CPython and this will not
|
||
only work against that goal, but it may even make the implementation even more
|
||
complex than the current one. We believe that forbidding quote reuse should be
|
||
done in linters and code style tools and not in the parser, the same way other
|
||
confusing or hard-to-read constructs in the language are handled today.</li>
|
||
<li>We have decided not to lift the restriction that some expression portions
|
||
need to wrap <code class="docutils literal notranslate"><span class="pre">':'</span></code> and <code class="docutils literal notranslate"><span class="pre">'!'</span></code> in parentheses at the top level, e.g.:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s1">'Useless use of lambdas: </span><span class="si">{</span><span class="w"> </span><span class="k">lambda</span><span class="w"> </span><span class="n">x</span><span class="si">:</span><span class="s1"> x*2 </span><span class="si">}</span><span class="s1">'</span>
|
||
<span class="go">SyntaxError: unexpected EOF while parsing</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The reason is that this will introduce a considerable amount of
|
||
complexity for no real benefit. This is due to the fact that the <code class="docutils literal notranslate"><span class="pre">:</span></code> character
|
||
normally separates the f-string format specification. This format specification
|
||
is currently tokenized as a string. As the tokenizer MUST tokenize what’s on the
|
||
right of the <code class="docutils literal notranslate"><span class="pre">:</span></code> as either a string or a stream of tokens, this won’t allow the
|
||
parser to differentiate between the different semantics as that would require the
|
||
tokenizer to backtrack and produce a different set of tokens (this is, first try
|
||
as a stream of tokens, and if it fails, try as a string for a format specifier).</p>
|
||
<p>As there is no fundamental advantage in being able to allow lambdas and similar
|
||
expressions at the top level, we have decided to keep the restriction that these must
|
||
be parenthesized if needed:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="sa">f</span><span class="s1">'Useless use of lambdas: </span><span class="si">{</span><span class="w"> </span><span class="p">(</span><span class="k">lambda</span><span class="w"> </span><span class="n">x</span><span class="p">:</span><span class="w"> </span><span class="n">x</span><span class="o">*</span><span class="mi">2</span><span class="p">)</span><span class="w"> </span><span class="si">}</span><span class="s1">'</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>We have decided to disallow (for the time being) using escaped braces (<code class="docutils literal notranslate"><span class="pre">\{</span></code> and <code class="docutils literal notranslate"><span class="pre">\}</span></code>)
|
||
in addition to the <code class="docutils literal notranslate"><span class="pre">{{</span></code> and <code class="docutils literal notranslate"><span class="pre">}}</span></code> syntax. Although the authors of the PEP believe that
|
||
allowing escaped braces is a good idea, we have decided to not include it in this PEP, as it is not strictly
|
||
necessary for the formalization of f-strings proposed here, and it can be
|
||
added independently in a regular CPython issue.</li>
|
||
</ol>
|
||
</section>
|
||
<section id="open-issues">
|
||
<h2><a class="toc-backref" href="#open-issues" role="doc-backlink">Open Issues</a></h2>
|
||
<p>None yet</p>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document is placed in the public domain or under the
|
||
CC0-1.0-Universal license, whichever is more permissive.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0701.rst">https://github.com/python/peps/blob/main/peps/pep-0701.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0701.rst">2024-10-17 12:49:39 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#handling-of-f-string-debug-expressions">Handling of f-string debug expressions</a></li>
|
||
<li><a class="reference internal" href="#new-tokens">New tokens</a></li>
|
||
<li><a class="reference internal" href="#changes-to-the-tokenize-module">Changes to the tokenize module</a></li>
|
||
<li><a class="reference internal" href="#how-to-produce-these-new-tokens">How to produce these new tokens</a></li>
|
||
<li><a class="reference internal" href="#consequences-of-the-new-grammar">Consequences of the new grammar</a></li>
|
||
<li><a class="reference internal" href="#considerations-regarding-quote-reuse">Considerations regarding quote reuse</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#rejected-ideas">Rejected Ideas</a></li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0701.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |