1307 lines
146 KiB
HTML
1307 lines
146 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 675 – Arbitrary Literal String Type | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0675/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 675 – Arbitrary Literal String Type | peps.python.org'>
|
||
<meta property="og:description" content="There is currently no way to specify, using type annotations, that a function parameter can be of any literal string type. We have to specify a precise literal string type, such as Literal["foo"]. This PEP introduces a supertype of literal string types:...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0675/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="There is currently no way to specify, using type annotations, that a function parameter can be of any literal string type. We have to specify a precise literal string type, such as Literal["foo"]. This PEP introduces a supertype of literal string types:...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 675</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 675 – Arbitrary Literal String Type</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Pradeep Kumar Srinivasan <gohanpra at gmail.com>, Graham Bleaney <gbleaney at gmail.com></dd>
|
||
<dt class="field-even">Sponsor<span class="colon">:</span></dt>
|
||
<dd class="field-even">Jelle Zijlstra <jelle.zijlstra at gmail.com></dd>
|
||
<dt class="field-odd">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://mail.python.org/archives/list/typing-sig@python.org/thread/VB74EHNM4RODDFM64NEEEBJQVAUAWIAW/">Typing-SIG thread</a></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Topic<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="../topic/typing/">Typing</a></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">30-Nov-2021</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.11</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd">07-Feb-2022</dd>
|
||
<dt class="field-even">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://mail.python.org/archives/list/python-dev@python.org/message/XEOOSSPNYPGZ5NXOJFPLXG2BTN7EVRT5/">Python-Dev message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
||
<li><a class="reference internal" href="#usage-statistics">Usage statistics</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#runtime-behavior">Runtime Behavior</a></li>
|
||
<li><a class="reference internal" href="#valid-locations-for-literalstring">Valid Locations for <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#type-inference">Type Inference</a><ul>
|
||
<li><a class="reference internal" href="#inferring-literalstring">Inferring <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#examples">Examples</a></li>
|
||
<li><a class="reference internal" href="#interaction-with-typevars-and-generics">Interaction with TypeVars and Generics</a></li>
|
||
<li><a class="reference internal" href="#interactions-with-overloads">Interactions with Overloads</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#rejected-alternatives">Rejected Alternatives</a><ul>
|
||
<li><a class="reference internal" href="#why-not-use-tool-x">Why not use tool X?</a></li>
|
||
<li><a class="reference internal" href="#why-not-use-a-newtype-for-str">Why not use a <code class="docutils literal notranslate"><span class="pre">NewType</span></code> for <code class="docutils literal notranslate"><span class="pre">str</span></code>?</a></li>
|
||
<li><a class="reference internal" href="#why-not-try-to-emulate-trusted-types">Why not try to emulate Trusted Types?</a></li>
|
||
<li><a class="reference internal" href="#runtime-checkable-literalstring">Runtime Checkable <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#rejected-names">Rejected Names</a></li>
|
||
<li><a class="reference internal" href="#literalbytes"><code class="docutils literal notranslate"><span class="pre">LiteralBytes</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#appendix-a-other-uses">Appendix A: Other Uses</a><ul>
|
||
<li><a class="reference internal" href="#command-injection">Command Injection</a></li>
|
||
<li><a class="reference internal" href="#cross-site-scripting-xss">Cross Site Scripting (XSS)</a></li>
|
||
<li><a class="reference internal" href="#server-side-template-injection-ssti">Server Side Template Injection (SSTI)</a></li>
|
||
<li><a class="reference internal" href="#logging-format-string-injection">Logging Format String Injection</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#appendix-b-limitations">Appendix B: Limitations</a></li>
|
||
<li><a class="reference internal" href="#appendix-c-str-methods-that-preserve-literalstring">Appendix C: <code class="docutils literal notranslate"><span class="pre">str</span></code> methods that preserve <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#appendix-d-guidelines-for-using-literalstring-in-stubs">Appendix D: Guidelines for using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in Stubs</a></li>
|
||
<li><a class="reference internal" href="#resources">Resources</a><ul>
|
||
<li><a class="reference internal" href="#literal-string-types-in-scala">Literal String Types in Scala</a></li>
|
||
<li><a class="reference internal" href="#thanks">Thanks</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<div class="pep-banner canonical-typing-spec sticky-banner admonition attention">
|
||
<p class="admonition-title">Attention</p>
|
||
<p>This PEP is a historical document: see <a class="reference external" href="https://typing.readthedocs.io/en/latest/spec/literal.html#literalstring" title="(in typing)"><span>LiteralString</span></a> and
|
||
<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.LiteralString" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">typing.LiteralString</span></code></a> for up-to-date specs and documentation. Canonical typing specs are maintained at the <a class="reference external" href="https://typing.readthedocs.io/en/latest/spec/">typing specs site</a>; runtime typing behaviour is described in the CPython documentation.</p>
|
||
<p class="close-button">×</p>
|
||
<p>See the <a class="reference external" href="https://typing.readthedocs.io/en/latest/spec/meta.html">typing specification update process</a> for how to propose changes to the typing spec.</p>
|
||
</div>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>There is currently no way to specify, using type annotations, that a
|
||
function parameter can be of any literal string type. We have to
|
||
specify a precise literal string type, such as
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code>. This PEP introduces a supertype of literal string
|
||
types: <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. This allows a function to accept arbitrary
|
||
literal string types, such as <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> or
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["bar"]</span></code>.</p>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>Powerful APIs that execute SQL or shell commands often recommend that
|
||
they be invoked with literal strings, rather than arbitrary user
|
||
controlled strings. There is no way to express this recommendation in
|
||
the type system, however, meaning security vulnerabilities sometimes
|
||
occur when developers fail to follow it. For example, a naive way to
|
||
look up a user record from a database is to accept a user id and
|
||
insert it into a predefined SQL query:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">User</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"SELECT * FROM data WHERE user_id = </span><span class="si">{</span><span class="n">user_id</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||
<span class="o">...</span> <span class="c1"># Transform data to a User object and return it</span>
|
||
|
||
<span class="n">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="s2">"user123"</span><span class="p">)</span> <span class="c1"># OK.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>However, the user-controlled data <code class="docutils literal notranslate"><span class="pre">user_id</span></code> is being mixed with the
|
||
SQL command string, which means a malicious user could run arbitrary
|
||
SQL commands:</p>
|
||
<div class="bad highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="c1"># Delete the table.</span>
|
||
<span class="n">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="s2">"user123; DROP TABLE data;"</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Fetch all users (since 1 = 1 is always true).</span>
|
||
<span class="n">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="s2">"user123 OR 1 = 1"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>To prevent such SQL injection attacks, SQL APIs offer parameterized
|
||
queries, which separate the executed query from user-controlled data
|
||
and make it impossible to run arbitrary queries. For example, with
|
||
<a class="reference external" href="https://docs.python.org/3/library/sqlite3.html">sqlite3</a>, our
|
||
original function would be written safely as a query with parameters:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">User</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="s2">"SELECT * FROM data WHERE user_id = ?"</span>
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="p">(</span><span class="n">user_id</span><span class="p">,))</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The problem is that there is no way to enforce this
|
||
discipline. sqlite3’s own <a class="reference external" href="https://docs.python.org/3/library/sqlite3.html">documentation</a> can only admonish
|
||
the reader to not dynamically build the <code class="docutils literal notranslate"><span class="pre">sql</span></code> argument from external
|
||
input; the API’s authors cannot express that through the type
|
||
system. Users can (and often do) still use a convenient f-string as
|
||
before and leave their code vulnerable to SQL injection.</p>
|
||
<p>Existing tools, such as the popular security linter <a class="reference external" href="https://github.com/PyCQA/bandit/blob/aac3f16f45648a7756727286ba8f8f0cf5e7d408/bandit/plugins/django_sql_injection.py#L102">Bandit</a>,
|
||
attempt to detect unsafe external data used in SQL APIs, by inspecting
|
||
the AST or by other semantic pattern-matching. These tools, however,
|
||
preclude common idioms like storing a large multi-line query in a
|
||
variable before executing it, adding literal string modifiers to the
|
||
query based on some conditions, or transforming the query string using
|
||
a function. (We survey existing tools in the <a class="reference internal" href="#rejected-alternatives">Rejected Alternatives</a>
|
||
section.) For example, many tools will detect a false positive issue
|
||
in this benign snippet:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">query_data</span><span class="p">(</span><span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="s2">"""</span>
|
||
<span class="s2"> SELECT</span>
|
||
<span class="s2"> user.name,</span>
|
||
<span class="s2"> user.age</span>
|
||
<span class="s2"> FROM data</span>
|
||
<span class="s2"> WHERE user_id = ?</span>
|
||
<span class="s2"> """</span>
|
||
<span class="k">if</span> <span class="n">limit</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">+=</span> <span class="s2">" LIMIT 1"</span>
|
||
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="p">(</span><span class="n">user_id</span><span class="p">,))</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We want to forbid harmful execution of user-controlled data while
|
||
still allowing benign idioms like the above and not requiring extra
|
||
user work.</p>
|
||
<p>To meet this goal, we introduce the <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> type, which only
|
||
accepts string values that are known to be made of literals. This is a
|
||
generalization of the <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> type from <a class="pep reference internal" href="../pep-0586/" title="PEP 586 – Literal Types">PEP 586</a>.
|
||
A string of type
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> cannot contain user-controlled data. Thus, any API
|
||
that only accepts <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> will be immune to injection
|
||
vulnerabilities (with <a class="reference internal" href="#appendix-b-limitations">pragmatic limitations</a>).</p>
|
||
<p>Since we want the <code class="docutils literal notranslate"><span class="pre">sqlite3</span></code> <code class="docutils literal notranslate"><span class="pre">execute</span></code> method to disallow strings
|
||
built with user input, we would make its <a class="reference external" href="https://github.com/python/typeshed/blob/1c88ceeee924ec6cfe05dd4865776b49fec299e6/stdlib/sqlite3/dbapi2.pyi#L153">typeshed stub</a>
|
||
accept a <code class="docutils literal notranslate"><span class="pre">sql</span></code> query that is of type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">LiteralString</span>
|
||
|
||
<span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">parameters</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">Cursor</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This successfully forbids our unsafe SQL example. The variable
|
||
<code class="docutils literal notranslate"><span class="pre">query</span></code> below is inferred to have type <code class="docutils literal notranslate"><span class="pre">str</span></code>, since it is created
|
||
from a format string using <code class="docutils literal notranslate"><span class="pre">user_id</span></code>, and cannot be passed to
|
||
<code class="docutils literal notranslate"><span class="pre">execute</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">query_user</span><span class="p">(</span><span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">User</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"SELECT * FROM data WHERE user_id = </span><span class="si">{</span><span class="n">user_id</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">)</span> <span class="c1"># Error: Expected LiteralString, got str.</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The method remains flexible enough to allow our more complicated
|
||
example:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">query_data</span><span class="p">(</span><span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="c1"># This is a literal string.</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="s2">"""</span>
|
||
<span class="s2"> SELECT</span>
|
||
<span class="s2"> user.name,</span>
|
||
<span class="s2"> user.age</span>
|
||
<span class="s2"> FROM data</span>
|
||
<span class="s2"> WHERE user_id = ?</span>
|
||
<span class="s2"> """</span>
|
||
|
||
<span class="k">if</span> <span class="n">limit</span><span class="p">:</span>
|
||
<span class="c1"># Still has type LiteralString because we added a literal string.</span>
|
||
<span class="n">query</span> <span class="o">+=</span> <span class="s2">" LIMIT 1"</span>
|
||
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="p">(</span><span class="n">user_id</span><span class="p">,))</span> <span class="c1"># OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Notice that the user did not have to change their SQL code at all. The
|
||
type checker was able to infer the literal string type and complain
|
||
only in case of violations.</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is also useful in other cases where we want strict
|
||
command-data separation, such as when building shell commands or when
|
||
rendering a string into an HTML response without escaping (see
|
||
<a class="reference internal" href="#appendix-a-other-uses">Appendix A: Other Uses</a>). Overall, this combination of strictness
|
||
and flexibility makes it easy to enforce safer API usage in sensitive
|
||
code without burdening users.</p>
|
||
<section id="usage-statistics">
|
||
<h3><a class="toc-backref" href="#usage-statistics" role="doc-backlink">Usage statistics</a></h3>
|
||
<p>In a sample of open-source projects using <code class="docutils literal notranslate"><span class="pre">sqlite3</span></code>, we found that
|
||
<code class="docutils literal notranslate"><span class="pre">conn.execute</span></code> was called <a class="reference external" href="https://grep.app/search?q=conn%5C.execute%5C%28%5Cs%2A%5B%27%22%5D&regexp=true&filter[lang][0]=Python">~67% of the time</a>
|
||
with a safe string literal and <a class="reference external" href="https://grep.app/search?current=3&q=conn%5C.execute%5C%28%5Ba-zA-Z_%5D%2B%5C%29&regexp=true&filter[lang][0]=Python">~33% of the time</a>
|
||
with a potentially unsafe, local string variable. Using this PEP’s
|
||
literal string type along with a type checker would prevent the unsafe
|
||
portion of that 33% of cases (ie. the ones where user controlled data
|
||
is incorporated into the query), while seamlessly allowing the safe
|
||
ones to remain.</p>
|
||
</section>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>Firstly, why use <em>types</em> to prevent security vulnerabilities?</p>
|
||
<p>Warning users in documentation is insufficient - most users either
|
||
never see these warnings or ignore them. Using an existing dynamic or
|
||
static analysis approach is too restrictive - these prevent natural
|
||
idioms, as we saw in the <a class="reference internal" href="#motivation">Motivation</a> section (and will discuss more
|
||
extensively in the <a class="reference internal" href="#rejected-alternatives">Rejected Alternatives</a> section). The typing-based
|
||
approach in this PEP strikes a user-friendly balance between
|
||
strictness and flexibility.</p>
|
||
<p>Runtime approaches do not work because, at runtime, the query string
|
||
is a plain <code class="docutils literal notranslate"><span class="pre">str</span></code>. While we could prevent some exploits using
|
||
heuristics, such as regex-filtering for obviously malicious payloads,
|
||
there will always be a way to work around them (perfectly
|
||
distinguishing good and bad queries reduces to the halting problem).</p>
|
||
<p>Static approaches, such as checking the AST to see if the query string
|
||
is a literal string expression, cannot tell when a string is assigned
|
||
to an intermediate variable or when it is transformed by a benign
|
||
function. This makes them overly restrictive.</p>
|
||
<p>The type checker, surprisingly, does better than both because it has
|
||
access to information not available in the runtime or static analysis
|
||
approaches. Specifically, the type checker can tell us whether an
|
||
expression has a literal string type, say <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code>. The type
|
||
checker already propagates types across variable assignments or
|
||
function calls.</p>
|
||
<p>In the current type system itself, if the SQL or shell command
|
||
execution function only accepted three possible input strings, our job
|
||
would be done. We would just say:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"foo"</span><span class="p">,</span> <span class="s2">"bar"</span><span class="p">,</span> <span class="s2">"baz"</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>But, of course, <code class="docutils literal notranslate"><span class="pre">execute</span></code> can accept <em>any</em> possible query. How do we
|
||
ensure that the query does not contain an arbitrary, user-controlled
|
||
string?</p>
|
||
<p>We want to specify that the value must be of some type
|
||
<code class="docutils literal notranslate"><span class="pre">Literal[<...>]</span></code> where <code class="docutils literal notranslate"><span class="pre"><...></span></code> is some string. This is what
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> represents. <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is the “supertype” of
|
||
all literal string types. In effect, this PEP just introduces a type
|
||
in the type hierarchy between <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> and <code class="docutils literal notranslate"><span class="pre">str</span></code>. Any
|
||
particular literal string, such as <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> or
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["bar"]</span></code>, is compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, but not the
|
||
other way around. The “supertype” of <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> itself is
|
||
<code class="docutils literal notranslate"><span class="pre">str</span></code>. So, <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is compatible with <code class="docutils literal notranslate"><span class="pre">str</span></code>, but not the
|
||
other way around.</p>
|
||
<p>Note that a <code class="docutils literal notranslate"><span class="pre">Union</span></code> of literal types is naturally compatible with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> because each element of the <code class="docutils literal notranslate"><span class="pre">Union</span></code> is individually
|
||
compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. So, <code class="docutils literal notranslate"><span class="pre">Literal["foo",</span> <span class="pre">"bar"]</span></code> is
|
||
compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</p>
|
||
<p>However, recall that we don’t just want to represent exact literal
|
||
queries. We also want to support composition of two literal strings,
|
||
such as <code class="docutils literal notranslate"><span class="pre">query</span> <span class="pre">+</span> <span class="pre">"</span> <span class="pre">LIMIT</span> <span class="pre">1"</span></code>. This too is possible with the above
|
||
concept. If <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> are two values of type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>,
|
||
then <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">+</span> <span class="pre">y</span></code> will also be of type compatible with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. We can reason about this by looking at specific
|
||
instances such as <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> and <code class="docutils literal notranslate"><span class="pre">Literal["bar"]</span></code>; the value
|
||
of the added string <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">+</span> <span class="pre">y</span></code> can only be <code class="docutils literal notranslate"><span class="pre">"foobar"</span></code>, which has type
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["foobar"]</span></code> and is thus compatible with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. The same reasoning applies when <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> are
|
||
unions of literal types; the result of pairwise adding any two literal
|
||
types from <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> respectively is a literal type, which means
|
||
that the overall result is a <code class="docutils literal notranslate"><span class="pre">Union</span></code> of literal types and is thus
|
||
compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</p>
|
||
<p>In this way, we are able to leverage Python’s concept of a <code class="docutils literal notranslate"><span class="pre">Literal</span></code>
|
||
string type to specify that our API can only accept strings that are
|
||
known to be constructed from literals. More specific details follow in
|
||
the remaining sections.</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<section id="runtime-behavior">
|
||
<h3><a class="toc-backref" href="#runtime-behavior" role="doc-backlink">Runtime Behavior</a></h3>
|
||
<p>We propose adding <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> to <code class="docutils literal notranslate"><span class="pre">typing.py</span></code>, with an
|
||
implementation similar to <code class="docutils literal notranslate"><span class="pre">typing.NoReturn</span></code>.</p>
|
||
<p>Note that <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is a special form used solely for type
|
||
checking. There is no expression for which <code class="docutils literal notranslate"><span class="pre">type(<expr>)</span></code> will
|
||
produce <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> at runtime. So, we do not specify in the
|
||
implementation that it is a subclass of <code class="docutils literal notranslate"><span class="pre">str</span></code>.</p>
|
||
</section>
|
||
<section id="valid-locations-for-literalstring">
|
||
<h3><a class="toc-backref" href="#valid-locations-for-literalstring" role="doc-backlink">Valid Locations for <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> can be used where any other type can be used:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">variable_annotation</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
|
||
<span class="k">def</span> <span class="nf">my_function</span><span class="p">(</span><span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="k">class</span> <span class="nc">Foo</span><span class="p">:</span>
|
||
<span class="n">my_attribute</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
|
||
<span class="n">type_argument</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span>
|
||
|
||
<span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"T"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">LiteralString</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It cannot be nested within unions of <code class="docutils literal notranslate"><span class="pre">Literal</span></code> types:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">bad_union</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"hello"</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">]</span> <span class="c1"># Not OK</span>
|
||
<span class="n">bad_nesting</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span> <span class="c1"># Not OK</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="type-inference">
|
||
<h3><a class="toc-backref" href="#type-inference" role="doc-backlink">Type Inference</a></h3>
|
||
<section id="inferring-literalstring">
|
||
<span id="inferring-literal-string"></span><h4><a class="toc-backref" href="#inferring-literalstring" role="doc-backlink">Inferring <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></h4>
|
||
<p>Any literal string type is compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. For
|
||
example, <code class="docutils literal notranslate"><span class="pre">x:</span> <span class="pre">LiteralString</span> <span class="pre">=</span> <span class="pre">"foo"</span></code> is valid because <code class="docutils literal notranslate"><span class="pre">"foo"</span></code> is
|
||
inferred to be of type <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code>.</p>
|
||
<p>As per the <a class="reference internal" href="#rationale">Rationale</a>, we also infer <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in the
|
||
following cases:</p>
|
||
<ul class="simple">
|
||
<li>Addition: <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">+</span> <span class="pre">y</span></code> is of type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> if both <code class="docutils literal notranslate"><span class="pre">x</span></code> and
|
||
<code class="docutils literal notranslate"><span class="pre">y</span></code> are compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</li>
|
||
<li>Joining: <code class="docutils literal notranslate"><span class="pre">sep.join(xs)</span></code> is of type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> if <code class="docutils literal notranslate"><span class="pre">sep</span></code>’s
|
||
type is compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> and <code class="docutils literal notranslate"><span class="pre">xs</span></code>’s type is
|
||
compatible with <code class="docutils literal notranslate"><span class="pre">Iterable[LiteralString]</span></code>.</li>
|
||
<li>In-place addition: If <code class="docutils literal notranslate"><span class="pre">s</span></code> has type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> and <code class="docutils literal notranslate"><span class="pre">x</span></code> has
|
||
type compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, then <code class="docutils literal notranslate"><span class="pre">s</span> <span class="pre">+=</span> <span class="pre">x</span></code> preserves
|
||
<code class="docutils literal notranslate"><span class="pre">s</span></code>’s type as <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</li>
|
||
<li>String formatting: An f-string has type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> if and only
|
||
if its constituent expressions are literal strings. <code class="docutils literal notranslate"><span class="pre">s.format(...)</span></code>
|
||
has type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> if and only if <code class="docutils literal notranslate"><span class="pre">s</span></code> and the arguments have
|
||
types compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</li>
|
||
<li>Literal-preserving methods: In <a class="reference internal" href="#pep-675-appendix-c">Appendix C</a>,
|
||
we have provided an exhaustive list of <code class="docutils literal notranslate"><span class="pre">str</span></code> methods that preserve the
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> type.</li>
|
||
</ul>
|
||
<p>In all other cases, if one or more of the composed values has a
|
||
non-literal type <code class="docutils literal notranslate"><span class="pre">str</span></code>, the composition of types will have type
|
||
<code class="docutils literal notranslate"><span class="pre">str</span></code>. For example, if <code class="docutils literal notranslate"><span class="pre">s</span></code> has type <code class="docutils literal notranslate"><span class="pre">str</span></code>, then <code class="docutils literal notranslate"><span class="pre">"hello"</span> <span class="pre">+</span> <span class="pre">s</span></code>
|
||
has type <code class="docutils literal notranslate"><span class="pre">str</span></code>. This matches the pre-existing behavior of type
|
||
checkers.</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is compatible with the type <code class="docutils literal notranslate"><span class="pre">str</span></code>. It inherits all
|
||
methods from <code class="docutils literal notranslate"><span class="pre">str</span></code>. So, if we have a variable <code class="docutils literal notranslate"><span class="pre">s</span></code> of type
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, it is safe to write <code class="docutils literal notranslate"><span class="pre">s.startswith("hello")</span></code>.</p>
|
||
<p>Some type checkers refine the type of a string when doing an equality
|
||
check:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">foo</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">s</span> <span class="o">==</span> <span class="s2">"bar"</span><span class="p">:</span>
|
||
<span class="n">reveal_type</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="c1"># => Literal["bar"]</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Such a refined type in the if-block is also compatible with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> because its type is <code class="docutils literal notranslate"><span class="pre">Literal["bar"]</span></code>.</p>
|
||
</section>
|
||
<section id="examples">
|
||
<h4><a class="toc-backref" href="#examples" role="doc-backlink">Examples</a></h4>
|
||
<p>See the examples below to help clarify the above rules:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
<span class="n">s</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">literal_string</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="n">s</span> <span class="c1"># Error: Expected LiteralString, got str.</span>
|
||
<span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="s2">"hello"</span> <span class="c1"># OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Addition of literal strings:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">expect_literal_string</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="s2">"foo"</span> <span class="o">+</span> <span class="s2">"bar"</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span> <span class="o">+</span> <span class="s2">"bar"</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">literal_string2</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span> <span class="o">+</span> <span class="n">literal_string2</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">plain_string</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span> <span class="o">+</span> <span class="n">plain_string</span><span class="p">)</span> <span class="c1"># Not OK.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Join using literal strings:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">expect_literal_string</span><span class="p">(</span><span class="s2">","</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"foo"</span><span class="p">,</span> <span class="s2">"bar"</span><span class="p">]))</span> <span class="c1"># OK</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"foo"</span><span class="p">,</span> <span class="s2">"bar"</span><span class="p">]))</span> <span class="c1"># OK</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">literal_string</span><span class="p">,</span> <span class="n">literal_string2</span><span class="p">]))</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">xs</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_string</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">xs</span><span class="p">))</span> <span class="c1"># OK</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">plain_string</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">literal_string</span><span class="p">,</span> <span class="n">literal_string2</span><span class="p">]))</span>
|
||
<span class="c1"># Not OK because the separator has type 'str'.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>In-place addition using literal strings:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">literal_string</span> <span class="o">+=</span> <span class="s2">"foo"</span> <span class="c1"># OK</span>
|
||
<span class="n">literal_string</span> <span class="o">+=</span> <span class="n">literal_string2</span> <span class="c1"># OK</span>
|
||
<span class="n">literal_string</span> <span class="o">+=</span> <span class="n">plain_string</span> <span class="c1"># Not OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Format strings using literal strings:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">literal_name</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="sa">f</span><span class="s2">"hello </span><span class="si">{</span><span class="n">literal_name</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||
<span class="c1"># OK because it is composed from literal strings.</span>
|
||
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="s2">"hello </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">literal_name</span><span class="p">))</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="sa">f</span><span class="s2">"hello"</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">username</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="sa">f</span><span class="s2">"hello </span><span class="si">{</span><span class="n">username</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||
<span class="c1"># NOT OK. The format-string is constructed from 'username',</span>
|
||
<span class="c1"># which has type 'str'.</span>
|
||
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="s2">"hello </span><span class="si">{}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">username</span><span class="p">))</span> <span class="c1"># Not OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Other literal types, such as literal integers, are not compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">some_int</span><span class="p">:</span> <span class="nb">int</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">some_int</span><span class="p">)</span> <span class="c1"># Error: Expected LiteralString, got int.</span>
|
||
|
||
<span class="n">literal_one</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">literal_one</span><span class="p">)</span> <span class="c1"># Error: Expected LiteralString, got Literal[1].</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We can call functions on literal strings:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">add_limit</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">query</span> <span class="o">+</span> <span class="s2">" LIMIT = 1"</span>
|
||
|
||
<span class="k">def</span> <span class="nf">my_query</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">sql_connection</span><span class="p">()</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">add_limit</span><span class="p">(</span><span class="n">query</span><span class="p">),</span> <span class="p">(</span><span class="n">user_id</span><span class="p">,))</span> <span class="c1"># OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Conditional statements and expressions work as expected:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">return_literal_string</span><span class="p">()</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="s2">"foo"</span> <span class="k">if</span> <span class="n">condition1</span><span class="p">()</span> <span class="k">else</span> <span class="s2">"bar"</span> <span class="c1"># OK</span>
|
||
|
||
<span class="k">def</span> <span class="nf">return_literal_str2</span><span class="p">(</span><span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="s2">"foo"</span> <span class="k">if</span> <span class="n">condition1</span><span class="p">()</span> <span class="k">else</span> <span class="n">literal_string</span> <span class="c1"># OK</span>
|
||
|
||
<span class="k">def</span> <span class="nf">return_literal_str3</span><span class="p">()</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">condition1</span><span class="p">():</span>
|
||
<span class="n">result</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"foo"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"foo"</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">result</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="s2">"bar"</span>
|
||
|
||
<span class="k">return</span> <span class="n">result</span> <span class="c1"># OK</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="interaction-with-typevars-and-generics">
|
||
<h4><a class="toc-backref" href="#interaction-with-typevars-and-generics" role="doc-backlink">Interaction with TypeVars and Generics</a></h4>
|
||
<p>TypeVars can be bound to <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Literal</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">TypeVar</span>
|
||
|
||
<span class="n">TLiteral</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"TLiteral"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">LiteralString</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">literal_identity</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="n">TLiteral</span><span class="p">)</span> <span class="o">-></span> <span class="n">TLiteral</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">s</span>
|
||
|
||
<span class="n">hello</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"hello"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"hello"</span>
|
||
<span class="n">y</span> <span class="o">=</span> <span class="n">literal_identity</span><span class="p">(</span><span class="n">hello</span><span class="p">)</span>
|
||
<span class="n">reveal_type</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="c1"># => Literal["hello"]</span>
|
||
|
||
<span class="n">s</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
<span class="n">y2</span> <span class="o">=</span> <span class="n">literal_identity</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||
<span class="n">reveal_type</span><span class="p">(</span><span class="n">y2</span><span class="p">)</span> <span class="c1"># => LiteralString</span>
|
||
|
||
<span class="n">s_error</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">literal_identity</span><span class="p">(</span><span class="n">s_error</span><span class="p">)</span>
|
||
<span class="c1"># Error: Expected TLiteral (bound to LiteralString), got str.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> can be used as a type argument for generic classes:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Container</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">]):</span>
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">value</span>
|
||
|
||
<span class="n">literal_string</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="s2">"hello"</span>
|
||
<span class="n">x</span><span class="p">:</span> <span class="n">Container</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span> <span class="o">=</span> <span class="n">Container</span><span class="p">(</span><span class="n">literal_string</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">s</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">x_error</span><span class="p">:</span> <span class="n">Container</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span> <span class="o">=</span> <span class="n">Container</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="c1"># Not OK</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Standard containers like <code class="docutils literal notranslate"><span class="pre">List</span></code> work as expected:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">xs</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"foo"</span><span class="p">,</span> <span class="s2">"bar"</span><span class="p">,</span> <span class="s2">"baz"</span><span class="p">]</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="interactions-with-overloads">
|
||
<h4><a class="toc-backref" href="#interactions-with-overloads" role="doc-backlink">Interactions with Overloads</a></h4>
|
||
<p>Literal strings and overloads do not need to interact in a special
|
||
way: the existing rules work fine. <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> can be used as a
|
||
fallback overload where a specific <code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> type does not
|
||
match:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">foo</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"foo"</span><span class="p">])</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">foo</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">foo</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="n">x1</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">foo</span><span class="p">(</span><span class="s2">"foo"</span><span class="p">)</span> <span class="c1"># First overload.</span>
|
||
<span class="n">x2</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">foo</span><span class="p">(</span><span class="s2">"bar"</span><span class="p">)</span> <span class="c1"># Second overload.</span>
|
||
<span class="n">s</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">x3</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">foo</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="c1"># Third overload.</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>We propose adding <code class="docutils literal notranslate"><span class="pre">typing_extensions.LiteralString</span></code> for use in
|
||
earlier Python versions.</p>
|
||
<p>As <a class="pep reference internal" href="../pep-0586/#backwards-compatibility" title="PEP 586 – Literal Types § Backwards compatibility">PEP 586 mentions</a>,
|
||
type checkers “should feel free to experiment with more sophisticated
|
||
inference techniques”. So, if the type checker infers a literal string
|
||
type for an unannotated variable that is initialized with a literal
|
||
string, the following example should be OK:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">x</span> <span class="o">=</span> <span class="s2">"hello"</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||
<span class="c1"># OK, because x is inferred to have type 'Literal["hello"]'.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This enables precise type checking of idiomatic SQL query code without
|
||
annotating the code at all (as seen in the <a class="reference internal" href="#motivation">Motivation</a> section
|
||
example).</p>
|
||
<p>However, like <a class="pep reference internal" href="../pep-0586/" title="PEP 586 – Literal Types">PEP 586</a>, this PEP does not mandate the above inference
|
||
strategy. In case the type checker doesn’t infer <code class="docutils literal notranslate"><span class="pre">x</span></code> to have type
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["hello"]</span></code>, users can aid the type checker by explicitly
|
||
annotating it as <code class="docutils literal notranslate"><span class="pre">x:</span> <span class="pre">LiteralString</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">x</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="s2">"hello"</span>
|
||
<span class="n">expect_literal_string</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="rejected-alternatives">
|
||
<h2><a class="toc-backref" href="#rejected-alternatives" role="doc-backlink">Rejected Alternatives</a></h2>
|
||
<section id="why-not-use-tool-x">
|
||
<h3><a class="toc-backref" href="#why-not-use-tool-x" role="doc-backlink">Why not use tool X?</a></h3>
|
||
<p>Tools to catch issues such as SQL injection seem to come in three
|
||
flavors: AST based, function level analysis, and taint flow analysis.</p>
|
||
<p><strong>AST-based tools</strong>: <a class="reference external" href="https://github.com/PyCQA/bandit/blob/aac3f16f45648a7756727286ba8f8f0cf5e7d408/bandit/plugins/django_sql_injection.py#L102">Bandit</a>
|
||
has a plugin to warn when SQL queries are not literal
|
||
strings. The problem is that many perfectly safe SQL
|
||
queries are dynamically built out of string literals, as shown in the
|
||
<a class="reference internal" href="#motivation">Motivation</a> section. At the
|
||
AST level, the resultant SQL query is not going to appear as a string
|
||
literal anymore and is thus indistinguishable from a potentially
|
||
malicious string. To use these tools would require significantly
|
||
restricting developers’ ability to build SQL queries. <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>
|
||
can provide similar safety guarantees with fewer restrictions.</p>
|
||
<p><strong>Semgrep and pyanalyze</strong>: Semgrep supports a more sophisticated
|
||
function level analysis, including <a class="reference external" href="https://semgrep.dev/docs/writing-rules/data-flow/#constant-propagation">constant propagation</a>
|
||
within a function. This allows us to prevent injection attacks while
|
||
permitting some forms of safe dynamic SQL queries within a
|
||
function. <a class="reference external" href="https://github.com/quora/pyanalyze/blob/afcb58cd3e967e4e3fea9e57bb18b6b1d9d42ed7/README.md#extending-pyanalyze">pyanalyze</a>
|
||
has a similar extension. But neither handles function calls that
|
||
construct and return safe SQL queries. For example, in the code sample
|
||
below, <code class="docutils literal notranslate"><span class="pre">build_insert_query</span></code> is a helper function to create a query
|
||
that inserts multiple values into the corresponding columns. Semgrep
|
||
and pyanalyze forbid this natural usage whereas <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>
|
||
handles it with no burden on the programmer:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">build_insert_query</span><span class="p">(</span>
|
||
<span class="n">table</span><span class="p">:</span> <span class="n">LiteralString</span>
|
||
<span class="n">insert_columns</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">],</span>
|
||
<span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="n">sql</span> <span class="o">=</span> <span class="s2">"INSERT INTO "</span> <span class="o">+</span> <span class="n">table</span>
|
||
|
||
<span class="n">column_clause</span> <span class="o">=</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">insert_columns</span><span class="p">)</span>
|
||
<span class="n">value_clause</span> <span class="o">=</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"?"</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">insert_columns</span><span class="p">))</span>
|
||
|
||
<span class="n">sql</span> <span class="o">+=</span> <span class="sa">f</span><span class="s2">" (</span><span class="si">{</span><span class="n">column_clause</span><span class="si">}</span><span class="s2">) VALUES (</span><span class="si">{</span><span class="n">value_clause</span><span class="si">}</span><span class="s2">)"</span>
|
||
<span class="k">return</span> <span class="n">sql</span>
|
||
|
||
<span class="k">def</span> <span class="nf">insert_data</span><span class="p">(</span>
|
||
<span class="n">conn</span><span class="p">:</span> <span class="n">Connection</span><span class="p">,</span>
|
||
<span class="n">kvs_to_insert</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span>
|
||
<span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">query</span> <span class="o">=</span> <span class="n">build_insert_query</span><span class="p">(</span><span class="s2">"data"</span><span class="p">,</span> <span class="n">kvs_to_insert</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
||
<span class="n">conn</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">kvs_to_insert</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||
|
||
<span class="c1"># Example usage</span>
|
||
<span class="n">data_to_insert</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"column_1"</span><span class="p">:</span> <span class="n">value_1</span><span class="p">,</span> <span class="c1"># Note: values are not literals</span>
|
||
<span class="s2">"column_2"</span><span class="p">:</span> <span class="n">value_2</span><span class="p">,</span>
|
||
<span class="s2">"column_3"</span><span class="p">:</span> <span class="n">value_3</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
<span class="n">insert_data</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="n">data_to_insert</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Taint flow analysis</strong>: Tools such as <a class="reference external" href="https://pyre-check.org/docs/pysa-basics/">Pysa</a> or <a class="reference external" href="https://codeql.github.com/">CodeQL</a> are capable of tracking data flowing
|
||
from a user controlled input into a SQL query. These tools are
|
||
powerful but involve considerable overhead in setting up the tool in
|
||
CI, defining “taint” sinks and sources, and teaching developers how to
|
||
use them. They also usually take longer to run than a type checker
|
||
(minutes instead of seconds), which means feedback is not
|
||
immediate. Finally, they move the burden of preventing vulnerabilities
|
||
on to library users instead of allowing the libraries themselves to
|
||
specify precisely how their APIs must be called (as is possible with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>).</p>
|
||
<p>One final reason to prefer using a new type over a dedicated tool is
|
||
that type checkers are more widely used than dedicated security
|
||
tooling; for example, MyPy was downloaded <a class="reference external" href="https://www.pypistats.org/packages/mypy">over 7 million times</a> in Jan 2022 vs <a class="reference external" href="https://www.pypistats.org/packages/bandit">less than
|
||
2 million times</a> for
|
||
Bandit. Having security protections built right into type checkers
|
||
will mean that more developers benefit from them.</p>
|
||
</section>
|
||
<section id="why-not-use-a-newtype-for-str">
|
||
<h3><a class="toc-backref" href="#why-not-use-a-newtype-for-str" role="doc-backlink">Why not use a <code class="docutils literal notranslate"><span class="pre">NewType</span></code> for <code class="docutils literal notranslate"><span class="pre">str</span></code>?</a></h3>
|
||
<p>Any API for which <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> would be suitable could instead be
|
||
updated to accept a different type created within the Python type
|
||
system, such as <code class="docutils literal notranslate"><span class="pre">NewType("SafeSQL",</span> <span class="pre">str)</span></code>:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SafeSQL</span> <span class="o">=</span> <span class="n">NewType</span><span class="p">(</span><span class="s2">"SafeSQL"</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">:</span> <span class="n">SafeSQL</span><span class="p">,</span> <span class="n">parameters</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">Cursor</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="n">execute</span><span class="p">(</span><span class="n">SafeSQL</span><span class="p">(</span><span class="s2">"SELECT * FROM data WHERE user_id = ?"</span><span class="p">),</span> <span class="n">user_id</span><span class="p">)</span> <span class="c1"># OK</span>
|
||
|
||
<span class="n">user_query</span><span class="p">:</span> <span class="nb">str</span>
|
||
<span class="n">execute</span><span class="p">(</span><span class="n">user_query</span><span class="p">)</span> <span class="c1"># Error: Expected SafeSQL, got str.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Having to create a new type to call an API might give some developers
|
||
pause and encourage more caution, but it doesn’t guarantee that
|
||
developers won’t just turn a user controlled string into the new type,
|
||
and pass it into the modified API anyway:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">query</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"SELECT * FROM data WHERE user_id = f</span><span class="si">{</span><span class="n">user_id</span><span class="si">}</span><span class="s2">"</span>
|
||
<span class="n">execute</span><span class="p">(</span><span class="n">SafeSQL</span><span class="p">(</span><span class="n">query</span><span class="p">))</span> <span class="c1"># No error!</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We are back to square one with the problem of preventing arbitrary
|
||
inputs to <code class="docutils literal notranslate"><span class="pre">SafeSQL</span></code>. This is not a theoretical concern
|
||
either. Django uses the above approach with <code class="docutils literal notranslate"><span class="pre">SafeString</span></code> and
|
||
<a class="reference external" href="https://docs.djangoproject.com/en/dev/_modules/django/utils/safestring/#SafeString">mark_safe</a>. Issues
|
||
such as <a class="reference external" href="https://github.com/django/django/commit/2dd4d110c159d0c81dff42eaead2c378a0998735">CVE-2020-13596</a>
|
||
show how this technique can <a class="reference external" href="https://nvd.nist.gov/vuln/detail/CVE-2020-13596">fail</a>.</p>
|
||
<p>Also note that this requires invasive changes to the source code
|
||
(wrapping the query with <code class="docutils literal notranslate"><span class="pre">SafeSQL</span></code>) whereas <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>
|
||
requires no such changes. Users can remain oblivious to it as long as
|
||
they pass in literal strings to sensitive APIs.</p>
|
||
</section>
|
||
<section id="why-not-try-to-emulate-trusted-types">
|
||
<h3><a class="toc-backref" href="#why-not-try-to-emulate-trusted-types" role="doc-backlink">Why not try to emulate Trusted Types?</a></h3>
|
||
<p><a class="reference external" href="https://w3c.github.io/webappsec-trusted-types/dist/spec/">Trusted Types</a> is a W3C
|
||
specification for preventing DOM-based Cross Site Scripting (XSS). XSS
|
||
occurs when dangerous browser APIs accept raw user-controlled
|
||
strings. The specification modifies these APIs to accept only the
|
||
“Trusted Types” returned by designated sanitizing functions. These
|
||
sanitizing functions must take in a potentially malicious string and
|
||
validate it or render it benign somehow, for example by verifying that
|
||
it is a valid URL or HTML-encoding it.</p>
|
||
<p>It can be tempting to assume porting the concept of Trusted Types to
|
||
Python could solve the problem. The fundamental difference, however,
|
||
is that the output of a Trusted Types sanitizer is usually intended
|
||
<em>to not be executable code</em>. Thus it’s easy to HTML encode the input,
|
||
strip out dangerous tags, or otherwise render it inert. With a SQL
|
||
query or shell command, the end result <em>still needs to be executable
|
||
code</em>. There is no way to write a sanitizer that can reliably figure
|
||
out which parts of an input string are benign and which ones are
|
||
potentially malicious.</p>
|
||
</section>
|
||
<section id="runtime-checkable-literalstring">
|
||
<h3><a class="toc-backref" href="#runtime-checkable-literalstring" role="doc-backlink">Runtime Checkable <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></h3>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> concept could be extended beyond static type
|
||
checking to be a runtime checkable property of <code class="docutils literal notranslate"><span class="pre">str</span></code> objects. This
|
||
would provide some benefits, such as allowing frameworks to raise
|
||
errors on dynamic strings. Such runtime errors would be a more robust
|
||
defense mechanism than type errors, which can potentially be
|
||
suppressed, ignored, or never even seen if the author does not use a
|
||
type checker.</p>
|
||
<p>This extension to the <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> concept would dramatically
|
||
increase the scope of the proposal by requiring changes to one of the
|
||
most fundamental types in Python. While runtime taint checking on
|
||
strings, similar to Perl’s <a class="reference external" href="https://metacpan.org/pod/Taint">taint</a>,
|
||
has been <a class="reference external" href="https://bugs.python.org/issue500698">considered</a> and
|
||
<a class="reference external" href="https://github.com/felixgr/pytaint">attempted</a> in the past, and
|
||
others may consider it in the future, such extensions are out of scope
|
||
for this PEP.</p>
|
||
</section>
|
||
<section id="rejected-names">
|
||
<h3><a class="toc-backref" href="#rejected-names" role="doc-backlink">Rejected Names</a></h3>
|
||
<p>We considered a variety of names for the literal string type and
|
||
solicited ideas on <a class="reference external" href="https://mail.python.org/archives/list/typing-sig@python.org/thread/VB74EHNM4RODDFM64NEEEBJQVAUAWIAW/">typing-sig</a>.
|
||
Some notable alternatives were:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">Literal[str]</span></code>: This is a natural extension of the
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["foo"]</span></code> type name, but typing-sig <a class="reference external" href="https://mail.python.org/archives/list/typing-sig@python.org/message/2ZQO4NTJEI42KTRJDBL77MNANEXOW7UI/">objected</a>
|
||
that users could mistake this for the literal type of the <code class="docutils literal notranslate"><span class="pre">str</span></code>
|
||
class.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">LiteralStr</span></code>: This is shorter than <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> but looks
|
||
weird to the PEP authors.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">LiteralDerivedString</span></code>: This (along with
|
||
<code class="docutils literal notranslate"><span class="pre">MadeFromLiteralString</span></code>) best captures the technical meaning of
|
||
the type. It represents not just the type of literal expressions,
|
||
such as <code class="docutils literal notranslate"><span class="pre">"foo"</span></code>, but also that of expressions composed from
|
||
literals, such as <code class="docutils literal notranslate"><span class="pre">"foo"</span> <span class="pre">+</span> <span class="pre">"bar"</span></code>. However, both names seem wordy.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">StringLiteral</span></code>: Users might confuse this with the existing
|
||
concept of <a class="reference external" href="https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals">“string literals”</a>
|
||
where the string exists as a syntactic token in the source code,
|
||
whereas our concept is more general.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">SafeString</span></code>: While this comes close to our intended meaning, it
|
||
may mislead users into thinking that the string has been sanitized in
|
||
some way, perhaps by escaping HTML tags or shell-related special
|
||
characters.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">ConstantStr</span></code>: This does not capture the idea of composing literal
|
||
strings.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">StaticStr</span></code>: This suggests that the string is statically
|
||
computable, i.e., computable without running the program, which is
|
||
not true. The literal string may vary based on runtime flags, as
|
||
seen in the <a class="reference internal" href="#motivation">Motivation</a> examples.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">LiteralOnly[str]</span></code>: This has the advantage of being extensible to
|
||
other literal types, such as <code class="docutils literal notranslate"><span class="pre">bytes</span></code> or <code class="docutils literal notranslate"><span class="pre">int</span></code>. However, we did
|
||
not find the extensibility worth the loss of readability.</li>
|
||
</ul>
|
||
<p>Overall, there was no clear winner on typing-sig over a long period,
|
||
so we decided to tip the scales in favor of <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</p>
|
||
</section>
|
||
<section id="literalbytes">
|
||
<h3><a class="toc-backref" href="#literalbytes" role="doc-backlink"><code class="docutils literal notranslate"><span class="pre">LiteralBytes</span></code></a></h3>
|
||
<p>We could generalize literal byte types, such as <code class="docutils literal notranslate"><span class="pre">Literal[b"foo"]</span></code>,
|
||
to <code class="docutils literal notranslate"><span class="pre">LiteralBytes</span></code>. However, literal byte types are used much less
|
||
frequently than literal string types and we did not find much user
|
||
demand for <code class="docutils literal notranslate"><span class="pre">LiteralBytes</span></code>, so we decided not to include it in this
|
||
PEP. Others may, however, consider it in future PEPs.</p>
|
||
</section>
|
||
</section>
|
||
<section id="reference-implementation">
|
||
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
||
<p>This is implemented in Pyre v0.9.8 and is actively being used.</p>
|
||
<p>The implementation simply extends the type checker with
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> as a supertype of literal string types.</p>
|
||
<p>To support composition via addition, join, etc., it was sufficient to
|
||
overload the stubs for <code class="docutils literal notranslate"><span class="pre">str</span></code> in Pyre’s copy of typeshed.</p>
|
||
</section>
|
||
<section id="appendix-a-other-uses">
|
||
<h2><a class="toc-backref" href="#appendix-a-other-uses" role="doc-backlink">Appendix A: Other Uses</a></h2>
|
||
<p>To simplify the discussion and require minimal security knowledge, we
|
||
focused on SQL injections throughout the PEP. <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>,
|
||
however, can also be used to prevent many other kinds of <a class="reference external" href="https://owasp.org/www-community/Injection_Flaws">injection
|
||
vulnerabilities</a>.</p>
|
||
<section id="command-injection">
|
||
<h3><a class="toc-backref" href="#command-injection" role="doc-backlink">Command Injection</a></h3>
|
||
<p>APIs such as <code class="docutils literal notranslate"><span class="pre">subprocess.run</span></code> accept a string which can be run as a
|
||
shell command:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="sa">f</span><span class="s2">"echo 'Hello </span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">'"</span><span class="p">,</span> <span class="n">shell</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>If user-controlled data is included in the command string, the code is
|
||
vulnerable to “command injection”; i.e., an attacker can run malicious
|
||
commands. For example, a value of <code class="docutils literal notranslate"><span class="pre">'</span> <span class="pre">&&</span> <span class="pre">rm</span> <span class="pre">-rf</span> <span class="pre">/</span> <span class="pre">#</span></code> would result in
|
||
the following destructive command being run:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">echo</span> <span class="s1">'Hello '</span> <span class="o">&&</span> <span class="n">rm</span> <span class="o">-</span><span class="n">rf</span> <span class="o">/</span> <span class="c1">#'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This vulnerability could be prevented by updating <code class="docutils literal notranslate"><span class="pre">run</span></code> to only
|
||
accept <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> when used in <code class="docutils literal notranslate"><span class="pre">shell=True</span></code> mode. Here is one
|
||
simplified stub:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="n">command</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">shell</span><span class="p">:</span> <span class="nb">bool</span><span class="o">=...</span><span class="p">):</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="cross-site-scripting-xss">
|
||
<h3><a class="toc-backref" href="#cross-site-scripting-xss" role="doc-backlink">Cross Site Scripting (XSS)</a></h3>
|
||
<p>Most popular Python web frameworks, such as Django, use a templating
|
||
engine to produce HTML from user data. These templating languages
|
||
auto-escape user data before inserting it into the HTML template and
|
||
thus prevent cross site scripting (XSS) vulnerabilities.</p>
|
||
<p>But a common way to <a class="reference external" href="https://django.readthedocs.io/en/stable/ref/templates/language.html#how-to-turn-it-off">bypass auto-escaping</a>
|
||
and render HTML as-is is to use functions like <code class="docutils literal notranslate"><span class="pre">mark_safe</span></code> in
|
||
<a class="reference external" href="https://docs.djangoproject.com/en/dev/ref/utils/#django.utils.safestring.mark_safe">Django</a>
|
||
or <code class="docutils literal notranslate"><span class="pre">do_mark_safe</span></code> in <a class="reference external" href="https://github.com/pallets/jinja/blob/077b7918a7642ff6742fe48a32e54d7875140894/src/jinja2/filters.py#L1264">Jinja2</a>,
|
||
which cause XSS vulnerabilities:</p>
|
||
<div class="bad highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="n">dangerous_string</span> <span class="o">=</span> <span class="n">django</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">safestring</span><span class="o">.</span><span class="n">mark_safe</span><span class="p">(</span><span class="sa">f</span><span class="s2">"<script></span><span class="si">{</span><span class="n">user_input</span><span class="si">}</span><span class="s2"></script>"</span><span class="p">)</span>
|
||
<span class="k">return</span><span class="p">(</span><span class="n">dangerous_string</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This vulnerability could be prevented by updating <code class="docutils literal notranslate"><span class="pre">mark_safe</span></code> to
|
||
only accept <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="good highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="k">def</span> <span class="nf">mark_safe</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="server-side-template-injection-ssti">
|
||
<h3><a class="toc-backref" href="#server-side-template-injection-ssti" role="doc-backlink">Server Side Template Injection (SSTI)</a></h3>
|
||
<p>Templating frameworks, such as Jinja, allow Python expressions which
|
||
will be evaluated and substituted into the rendered result:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">template_str</span> <span class="o">=</span> <span class="s2">"There are {{ len(values) }} values: {{ values }}"</span>
|
||
<span class="n">template</span> <span class="o">=</span> <span class="n">jinja2</span><span class="o">.</span><span class="n">Template</span><span class="p">(</span><span class="n">template_str</span><span class="p">)</span>
|
||
<span class="n">template</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">values</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
|
||
<span class="c1"># Result: "There are 2 values: [1, 2]"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>If an attacker controls all or part of the template string, they can
|
||
insert expressions which execute arbitrary code and <a class="reference external" href="https://www.onsecurity.io/blog/server-side-template-injection-with-jinja2/">compromise</a>
|
||
the application:</p>
|
||
<div class="bad highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="n">malicious_str</span> <span class="o">=</span> <span class="s2">"{{''.__class__.__base__.__subclasses__()[408]('rm - rf /',shell=True)}}"</span>
|
||
<span class="n">template</span> <span class="o">=</span> <span class="n">jinja2</span><span class="o">.</span><span class="n">Template</span><span class="p">(</span><span class="n">malicious_str</span><span class="p">)</span>
|
||
<span class="n">template</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||
<span class="c1"># Result: The shell command 'rm - rf /' is run</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Template injection exploits like this could be prevented by updating
|
||
the <code class="docutils literal notranslate"><span class="pre">Template</span></code> API to only accept <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="good highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="k">class</span> <span class="nc">Template</span><span class="p">:</span>
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">):</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="logging-format-string-injection">
|
||
<h3><a class="toc-backref" href="#logging-format-string-injection" role="doc-backlink">Logging Format String Injection</a></h3>
|
||
<p>Logging frameworks often allow their input strings to contain
|
||
formatting directives. At its worst, allowing users to control the
|
||
logged string has led to <a class="reference external" href="https://nvd.nist.gov/vuln/detail/CVE-2021-44228">CVE-2021-44228</a> (colloquially
|
||
known as <code class="docutils literal notranslate"><span class="pre">log4shell</span></code>), which has been described as the <a class="reference external" href="https://www.theguardian.com/technology/2021/dec/10/software-flaw-most-critical-vulnerability-log-4-shell">“most
|
||
critical vulnerability of the last decade”</a>.
|
||
While no Python frameworks are currently known to be vulnerable to a
|
||
similar attack, the built-in logging framework does provide formatting
|
||
options which are vulnerable to Denial of Service attacks from
|
||
externally controlled logging strings. The following example
|
||
illustrates a simple denial of service scenario:</p>
|
||
<div class="bad highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="n">external_string</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%(foo)999999999s</span><span class="s2">"</span>
|
||
<span class="o">...</span>
|
||
<span class="c1"># Tries to add > 1GB of whitespace to the logged string:</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Received: </span><span class="si">{</span><span class="n">external_string</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">some_dict</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This kind of attack could be prevented by requiring that the format
|
||
string passed to the logger be a <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> and that all
|
||
externally controlled data be passed separately as arguments (as
|
||
proposed in <a class="reference external" href="https://bugs.python.org/issue46200">Issue 46200</a>):</p>
|
||
<div class="good highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="k">def</span> <span class="nf">info</span><span class="p">(</span><span class="n">msg</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="nb">object</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="appendix-b-limitations">
|
||
<h2><a class="toc-backref" href="#appendix-b-limitations" role="doc-backlink">Appendix B: Limitations</a></h2>
|
||
<p>There are a number of ways <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> could still fail to
|
||
prevent users from passing strings built from non-literal data to an
|
||
API:</p>
|
||
<p>1. If the developer does not use a type checker or does not add type
|
||
annotations, then violations will go uncaught.</p>
|
||
<p>2. <code class="docutils literal notranslate"><span class="pre">cast(LiteralString,</span> <span class="pre">non_literal_string)</span></code> could be used to lie to
|
||
the type checker and allow a dynamic string value to masquerade as a
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. The same goes for a variable that has type <code class="docutils literal notranslate"><span class="pre">Any</span></code>.</p>
|
||
<p>3. Comments such as <code class="docutils literal notranslate"><span class="pre">#</span> <span class="pre">type:</span> <span class="pre">ignore</span></code> could be used to ignore
|
||
warnings about non-literal strings.</p>
|
||
<p>4. Trivial functions could be constructed to convert a <code class="docutils literal notranslate"><span class="pre">str</span></code> to a
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>:</p>
|
||
<div class="bad highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="k">def</span> <span class="nf">make_literal</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span>
|
||
<span class="n">letters</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"A"</span><span class="p">:</span> <span class="s2">"A"</span><span class="p">,</span>
|
||
<span class="s2">"B"</span><span class="p">:</span> <span class="s2">"B"</span><span class="p">,</span>
|
||
<span class="o">...</span>
|
||
<span class="p">}</span>
|
||
<span class="n">output</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">letters</span><span class="p">[</span><span class="n">c</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">s</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We could mitigate the above using linting, code review, etc., but
|
||
ultimately a clever, malicious developer attempting to circumvent the
|
||
protections offered by <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> will always succeed. The
|
||
important thing to remember is that <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is not intended
|
||
to protect against <em>malicious</em> developers; it is meant to protect
|
||
against benign developers accidentally using sensitive APIs in a
|
||
dangerous way (without getting in their way otherwise).</p>
|
||
<p>Without <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, the best enforcement tool API authors have
|
||
is documentation, which is easily ignored and often not seen. With
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, API misuse requires conscious thought and artifacts
|
||
in the code that reviewers and future developers can notice.</p>
|
||
</section>
|
||
<section id="appendix-c-str-methods-that-preserve-literalstring">
|
||
<span id="pep-675-appendix-c"></span><h2><a class="toc-backref" href="#appendix-c-str-methods-that-preserve-literalstring" role="doc-backlink">Appendix C: <code class="docutils literal notranslate"><span class="pre">str</span></code> methods that preserve <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></h2>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">str</span></code> class has several methods that would benefit from
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. For example, users might expect
|
||
<code class="docutils literal notranslate"><span class="pre">"hello".capitalize()</span></code> to have the type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> similar to
|
||
the other examples we have seen in the <a class="reference internal" href="#inferring-literal-string">Inferring LiteralString</a> section. Inferring the type
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> is correct because the string is not an arbitrary
|
||
user-supplied string - we know that it has the type
|
||
<code class="docutils literal notranslate"><span class="pre">Literal["HELLO"]</span></code>, which is compatible with <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. In
|
||
other words, the <code class="docutils literal notranslate"><span class="pre">capitalize</span></code> method preserves the <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>
|
||
type. There are several other <code class="docutils literal notranslate"><span class="pre">str</span></code> methods that preserve
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</p>
|
||
<p>We propose updating the stub for <code class="docutils literal notranslate"><span class="pre">str</span></code> in typeshed so that the
|
||
methods are overloaded with the <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>-preserving
|
||
versions. This means type checkers do not have to hardcode
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> behavior for each method. It also lets us easily
|
||
support new methods in the future by updating the typeshed stub.</p>
|
||
<p>For example, to preserve literal types for the <code class="docutils literal notranslate"><span class="pre">capitalize</span></code> method,
|
||
we would change the stub as below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># before</span>
|
||
<span class="k">def</span> <span class="nf">capitalize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="c1"># after</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">capitalize</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">capitalize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The downside of changing the <code class="docutils literal notranslate"><span class="pre">str</span></code> stub is that the stub becomes
|
||
more complicated and can make error messages harder to
|
||
understand. Type checkers may need to special-case <code class="docutils literal notranslate"><span class="pre">str</span></code> to make
|
||
error messages understandable for users.</p>
|
||
<p>Below is an exhaustive list of <code class="docutils literal notranslate"><span class="pre">str</span></code> methods which, when called with
|
||
arguments of type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, must be treated as returning a
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>. If this PEP is accepted, we will update these
|
||
method signatures in typeshed:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">capitalize</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">capitalize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">casefold</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">casefold</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">center</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">center</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o">>=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">8</span><span class="p">):</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">expandtabs</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">tabsize</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">expandtabs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tabsize</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">expandtabs</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">tabsize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">expandtabs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tabsize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__iterable</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">])</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__iterable</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">ljust</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">ljust</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">lower</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">lower</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">lstrip</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">lstrip</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">partition</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__sep</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">partition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__sep</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">replace</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__old</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__new</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__count</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">replace</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__old</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">__new</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">__count</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o">>=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">9</span><span class="p">):</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">removeprefix</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__prefix</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">removeprefix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">removesuffix</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__suffix</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">removesuffix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rjust</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rjust</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">,</span> <span class="n">__fillchar</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rpartition</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__sep</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">LiteralString</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rpartition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__sep</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rsplit</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">maxsplit</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rsplit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">maxsplit</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rstrip</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">rstrip</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">maxsplit</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> <span class="n">maxsplit</span><span class="p">:</span> <span class="n">SupportsIndex</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">splitlines</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">keepends</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">splitlines</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">keepends</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">strip</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="n">LiteralString</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">strip</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__chars</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">swapcase</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">swapcase</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">title</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">title</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">upper</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">upper</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">zfill</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">zfill</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__width</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__s</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__s</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__mod__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">LiteralString</span><span class="p">,</span> <span class="o">...</span><span class="p">]])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__mod__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">]])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__mul__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">__n</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__mul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">__n</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__rmul__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__rmul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="n">SupportsIndex</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="appendix-d-guidelines-for-using-literalstring-in-stubs">
|
||
<h2><a class="toc-backref" href="#appendix-d-guidelines-for-using-literalstring-in-stubs" role="doc-backlink">Appendix D: Guidelines for using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in Stubs</a></h2>
|
||
<p>Libraries that do not contain type annotations within their source may
|
||
specify type stubs in Typeshed. Libraries written in other languages,
|
||
such as those for machine learning, may also provide Python type
|
||
stubs. This means the type checker cannot verify that the type
|
||
annotations match the source code and must trust the type stub. Thus,
|
||
authors of type stubs need to be careful when using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>,
|
||
since a function may falsely appear to be safe when it is not.</p>
|
||
<p>We recommend the following guidelines for using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in stubs:</p>
|
||
<ul>
|
||
<li>If the stub is for a pure function, we recommend using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>
|
||
in the return type of the function or of its overloads only if all
|
||
the corresponding parameters have literal types (i.e.,
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> or <code class="docutils literal notranslate"><span class="pre">Literal["a",</span> <span class="pre">"b"]</span></code>).<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># OK</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">my_transform</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">"a"</span><span class="p">,</span> <span class="s2">"b"</span><span class="p">])</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">my_transform</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
|
||
<span class="c1"># Not OK</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">my_transform</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">LiteralString</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">LiteralString</span><span class="p">:</span> <span class="o">...</span>
|
||
<span class="nd">@overload</span>
|
||
<span class="k">def</span> <span class="nf">my_transform</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li>If the stub is for a <code class="docutils literal notranslate"><span class="pre">staticmethod</span></code>, we recommend the same
|
||
guideline as above.</li>
|
||
<li>If the stub is for any other kind of method, we recommend against
|
||
using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in the return type of the method or any of
|
||
its overloads. This is because, even if all the explicit parameters
|
||
have type <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>, the object itself may be created using
|
||
user data and thus the return type may be user-controlled.</li>
|
||
<li>If the stub is for a class attribute or global variable, we also
|
||
recommend against using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> because the untyped code
|
||
may write arbitrary values to the attribute.</li>
|
||
</ul>
|
||
<p>However, we leave the final call to the library author. They may use
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> if they feel confident that the string returned by
|
||
the method or function or the string stored in the attribute is
|
||
guaranteed to have a literal type - i.e., the string is created by
|
||
applying only literal-preserving <code class="docutils literal notranslate"><span class="pre">str</span></code> operations to a string
|
||
literal.</p>
|
||
<p>Note that these guidelines do not apply to inline type annotations
|
||
since the type checker can verify that, say, a method returning
|
||
<code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> does in fact return an expression of that type.</p>
|
||
</section>
|
||
<section id="resources">
|
||
<h2><a class="toc-backref" href="#resources" role="doc-backlink">Resources</a></h2>
|
||
<section id="literal-string-types-in-scala">
|
||
<h3><a class="toc-backref" href="#literal-string-types-in-scala" role="doc-backlink">Literal String Types in Scala</a></h3>
|
||
<p>Scala <a class="reference external" href="https://www.scala-lang.org/api/2.13.x/scala/Singleton.html">uses</a>
|
||
<code class="docutils literal notranslate"><span class="pre">Singleton</span></code> as the supertype for singleton types, which includes
|
||
literal string types, such as <code class="docutils literal notranslate"><span class="pre">"foo"</span></code>. <code class="docutils literal notranslate"><span class="pre">Singleton</span></code> is Scala’s
|
||
generalized analogue of this PEP’s <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code>.</p>
|
||
<p>Tamer Abdulradi showed how Scala’s literal string types can be used
|
||
for “Preventing SQL injection at compile time”, Scala Days talk
|
||
<a class="reference external" href="https://slideslive.com/38907881/literal-types-what-they-are-good-for">Literal types: What are they good for?</a>
|
||
(slides 52 to 68).</p>
|
||
</section>
|
||
<section id="thanks">
|
||
<h3><a class="toc-backref" href="#thanks" role="doc-backlink">Thanks</a></h3>
|
||
<p>Thanks to the following people for their feedback on the PEP:</p>
|
||
<p>Edward Qiu, Jia Chen, Shannon Zhu, Gregory P. Smith, Никита Соболев,
|
||
CAM Gerlach, Arie Bovenberg, David Foster, and Shengye Wan</p>
|
||
</section>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document is placed in the public domain or under the
|
||
CC0-1.0-Universal license, whichever is more permissive.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0675.rst">https://github.com/python/peps/blob/main/peps/pep-0675.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0675.rst">2024-06-11 22:12:09 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
||
<li><a class="reference internal" href="#usage-statistics">Usage statistics</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#runtime-behavior">Runtime Behavior</a></li>
|
||
<li><a class="reference internal" href="#valid-locations-for-literalstring">Valid Locations for <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#type-inference">Type Inference</a><ul>
|
||
<li><a class="reference internal" href="#inferring-literalstring">Inferring <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#examples">Examples</a></li>
|
||
<li><a class="reference internal" href="#interaction-with-typevars-and-generics">Interaction with TypeVars and Generics</a></li>
|
||
<li><a class="reference internal" href="#interactions-with-overloads">Interactions with Overloads</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#rejected-alternatives">Rejected Alternatives</a><ul>
|
||
<li><a class="reference internal" href="#why-not-use-tool-x">Why not use tool X?</a></li>
|
||
<li><a class="reference internal" href="#why-not-use-a-newtype-for-str">Why not use a <code class="docutils literal notranslate"><span class="pre">NewType</span></code> for <code class="docutils literal notranslate"><span class="pre">str</span></code>?</a></li>
|
||
<li><a class="reference internal" href="#why-not-try-to-emulate-trusted-types">Why not try to emulate Trusted Types?</a></li>
|
||
<li><a class="reference internal" href="#runtime-checkable-literalstring">Runtime Checkable <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#rejected-names">Rejected Names</a></li>
|
||
<li><a class="reference internal" href="#literalbytes"><code class="docutils literal notranslate"><span class="pre">LiteralBytes</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#appendix-a-other-uses">Appendix A: Other Uses</a><ul>
|
||
<li><a class="reference internal" href="#command-injection">Command Injection</a></li>
|
||
<li><a class="reference internal" href="#cross-site-scripting-xss">Cross Site Scripting (XSS)</a></li>
|
||
<li><a class="reference internal" href="#server-side-template-injection-ssti">Server Side Template Injection (SSTI)</a></li>
|
||
<li><a class="reference internal" href="#logging-format-string-injection">Logging Format String Injection</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#appendix-b-limitations">Appendix B: Limitations</a></li>
|
||
<li><a class="reference internal" href="#appendix-c-str-methods-that-preserve-literalstring">Appendix C: <code class="docutils literal notranslate"><span class="pre">str</span></code> methods that preserve <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code></a></li>
|
||
<li><a class="reference internal" href="#appendix-d-guidelines-for-using-literalstring-in-stubs">Appendix D: Guidelines for using <code class="docutils literal notranslate"><span class="pre">LiteralString</span></code> in Stubs</a></li>
|
||
<li><a class="reference internal" href="#resources">Resources</a><ul>
|
||
<li><a class="reference internal" href="#literal-string-types-in-scala">Literal String Types in Scala</a></li>
|
||
<li><a class="reference internal" href="#thanks">Thanks</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0675.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |