898 lines
69 KiB
HTML
898 lines
69 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 456 – Secure and interchangeable hash algorithm | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0456/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 456 – Secure and interchangeable hash algorithm | peps.python.org'>
|
||
<meta property="og:description" content="This PEP proposes SipHash as default string and bytes hash algorithm to properly fix hash randomization once and for all. It also proposes modifications to Python’s C code in order to unify the hash code and to make it easily interchangeable.">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0456/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="This PEP proposes SipHash as default string and bytes hash algorithm to properly fix hash randomization once and for all. It also proposes modifications to Python’s C code in order to unify the hash code and to make it easily interchangeable.">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 456</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 456 – Secure and interchangeable hash algorithm</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Christian Heimes <christian at python.org></dd>
|
||
<dt class="field-even">BDFL-Delegate<span class="colon">:</span></dt>
|
||
<dd class="field-even">Alyssa Coghlan</dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">27-Sep-2013</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.4</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd">06-Oct-2013, 14-Nov-2013, 20-Nov-2013</dd>
|
||
<dt class="field-even">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://mail.python.org/pipermail/python-dev/2013-November/130400.html">Python-Dev message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#requirements-for-a-hash-function">Requirements for a hash function</a></li>
|
||
<li><a class="reference internal" href="#current-implementation-with-modified-fnv">Current implementation with modified FNV</a></li>
|
||
<li><a class="reference internal" href="#examined-hashing-algorithms">Examined hashing algorithms</a><ul>
|
||
<li><a class="reference internal" href="#siphash">SipHash</a></li>
|
||
<li><a class="reference internal" href="#murmurhash">MurmurHash</a></li>
|
||
<li><a class="reference internal" href="#cityhash">CityHash</a></li>
|
||
<li><a class="reference internal" href="#djbx33a">DJBX33A</a></li>
|
||
<li><a class="reference internal" href="#other">Other</a></li>
|
||
<li><a class="reference internal" href="#conclusion">Conclusion</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#small-string-optimization">Small string optimization</a></li>
|
||
<li><a class="reference internal" href="#c-api-additions">C API additions</a><ul>
|
||
<li><a class="reference internal" href="#hash-secret">hash secret</a></li>
|
||
<li><a class="reference internal" href="#hash-function-definition">hash function definition</a></li>
|
||
<li><a class="reference internal" href="#autoconf">autoconf</a></li>
|
||
<li><a class="reference internal" href="#hash-function-selection">hash function selection</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#python-api-addition">Python API addition</a><ul>
|
||
<li><a class="reference internal" href="#sys-module">sys module</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#necessary-modifications-to-c-code">Necessary modifications to C code</a><ul>
|
||
<li><a class="reference internal" href="#py-hashbytes-objects-object-c">_Py_HashBytes() (Objects/object.c)</a></li>
|
||
<li><a class="reference internal" href="#bytes-hash-objects-bytesobject-c">bytes_hash() (Objects/bytesobject.c)</a></li>
|
||
<li><a class="reference internal" href="#memory-hash-objects-memoryobject-c">memory_hash() (Objects/memoryobject.c)</a></li>
|
||
<li><a class="reference internal" href="#unicode-hash-objects-unicodeobject-c">unicode_hash() (Objects/unicodeobject.c)</a></li>
|
||
<li><a class="reference internal" href="#generic-hash-modules-datetimemodule-c">generic_hash() (Modules/_datetimemodule.c)</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#performance">Performance</a><ul>
|
||
<li><a class="reference internal" href="#hash-value-distribution">Hash value distribution</a></li>
|
||
<li><a class="reference internal" href="#typical-length">Typical length</a></li>
|
||
<li><a class="reference internal" href="#grand-unified-python-benchmark-suite">Grand Unified Python Benchmark Suite</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#alternative-counter-measures-against-hash-collision-dos">Alternative counter measures against hash collision DoS</a></li>
|
||
<li><a class="reference internal" href="#discussion">Discussion</a><ul>
|
||
<li><a class="reference internal" href="#pluggable">Pluggable</a></li>
|
||
<li><a class="reference internal" href="#non-aligned-memory-access">Non-aligned memory access</a></li>
|
||
<li><a class="reference internal" href="#ascii-str-bytes-hash-collision">ASCII str / bytes hash collision</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP proposes SipHash as default string and bytes hash algorithm to properly
|
||
fix hash randomization once and for all. It also proposes modifications to
|
||
Python’s C code in order to unify the hash code and to make it easily
|
||
interchangeable.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>Despite the last attempt <a class="reference internal" href="#issue13703" id="id1"><span>[issue13703]</span></a> CPython is still vulnerable to hash
|
||
collision DoS attacks <a class="reference internal" href="#c3" id="id2"><span>[29c3]</span></a> <a class="reference internal" href="#issue14621" id="id3"><span>[issue14621]</span></a>. The current hash algorithm and
|
||
its randomization is not resilient against attacks. Only a proper
|
||
cryptographic hash function prevents the extraction of secret randomization
|
||
keys. Although no practical attack against a Python-based service has been
|
||
seen yet, the weakness has to be fixed. Jean-Philippe Aumasson and Daniel
|
||
J. Bernstein have already shown how the seed for the current implementation
|
||
can be recovered <a class="reference internal" href="#poc" id="id4"><span>[poc]</span></a>.</p>
|
||
<p>Furthermore, the current hash algorithm is hard-coded and implemented multiple
|
||
times for bytes and three different Unicode representations UCS1, UCS2 and
|
||
UCS4. This makes it impossible for embedders to replace it with a different
|
||
implementation without patching and recompiling large parts of the interpreter.
|
||
Embedders may want to choose a more suitable hash function.</p>
|
||
<p>Finally the current implementation code does not perform well. In the common
|
||
case it only processes one or two bytes per cycle. On a modern 64-bit processor
|
||
the code can easily be adjusted to deal with eight bytes at once.</p>
|
||
<p>This PEP proposes three major changes to the hash code for strings and bytes:</p>
|
||
<ul class="simple">
|
||
<li>SipHash <a class="reference internal" href="#sip" id="id5"><span>[sip]</span></a> is introduced as default hash algorithm. It is fast and small
|
||
despite its cryptographic properties. Due to the fact that it was designed
|
||
by well known security and crypto experts, it is safe to assume that its
|
||
secure for the near future.</li>
|
||
<li>The existing FNV code is kept for platforms without a 64-bit data type. The
|
||
algorithm is optimized to process larger chunks per cycle.</li>
|
||
<li>Calculation of the hash of strings and bytes is moved into a single API
|
||
function instead of multiple specialized implementations in
|
||
<code class="docutils literal notranslate"><span class="pre">Objects/object.c</span></code> and <code class="docutils literal notranslate"><span class="pre">Objects/unicodeobject.c</span></code>. The function takes a
|
||
void pointer plus length and returns the hash for it.</li>
|
||
<li>The algorithm can be selected at compile time. FNV is guaranteed to exist
|
||
on all platforms. SipHash is available on the majority of modern systems.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="requirements-for-a-hash-function">
|
||
<h2><a class="toc-backref" href="#requirements-for-a-hash-function" role="doc-backlink">Requirements for a hash function</a></h2>
|
||
<ul class="simple">
|
||
<li>It MUST be able to hash arbitrarily large blocks of memory from 1 byte up
|
||
to the maximum <code class="docutils literal notranslate"><span class="pre">ssize_t</span></code> value.</li>
|
||
<li>It MUST produce at least 32 bits on 32-bit platforms and at least 64 bits
|
||
on 64-bit platforms. (Note: Larger outputs can be compressed with e.g.
|
||
<code class="docutils literal notranslate"><span class="pre">v</span> <span class="pre">^</span> <span class="pre">(v</span> <span class="pre">>></span> <span class="pre">32)</span></code>.)</li>
|
||
<li>It MUST support hashing of unaligned memory in order to support
|
||
hash(memoryview).</li>
|
||
<li>It is highly RECOMMENDED that the length of the input influences the
|
||
outcome, so that <code class="docutils literal notranslate"><span class="pre">hash(b'\00')</span> <span class="pre">!=</span> <span class="pre">hash(b'\x00\x00')</span></code>.</li>
|
||
</ul>
|
||
<p>The internal interface code between the hash function and the tp_hash slots
|
||
implements special cases for zero length input and a return value of <code class="docutils literal notranslate"><span class="pre">-1</span></code>.
|
||
An input of length <code class="docutils literal notranslate"><span class="pre">0</span></code> is mapped to hash value <code class="docutils literal notranslate"><span class="pre">0</span></code>. The output <code class="docutils literal notranslate"><span class="pre">-1</span></code>
|
||
is mapped to <code class="docutils literal notranslate"><span class="pre">-2</span></code>.</p>
|
||
</section>
|
||
<section id="current-implementation-with-modified-fnv">
|
||
<h2><a class="toc-backref" href="#current-implementation-with-modified-fnv" role="doc-backlink">Current implementation with modified FNV</a></h2>
|
||
<p>CPython currently uses a variant of the Fowler-Noll-Vo hash function
|
||
<a class="reference internal" href="#fnv" id="id6"><span>[fnv]</span></a>. The variant is has been modified to reduce the amount and cost of hash
|
||
collisions for common strings. The first character of the string is added
|
||
twice, the first time with a bit shift of 7. The length of the input
|
||
string is XOR-ed to the final value. Both deviations from the original FNV
|
||
algorithm reduce the amount of hash collisions for short strings.</p>
|
||
<p>Recently <a class="reference internal" href="#issue13703" id="id7"><span>[issue13703]</span></a> a random prefix and suffix were added as an attempt to
|
||
randomize the hash values. In order to protect the hash secret the code still
|
||
returns <code class="docutils literal notranslate"><span class="pre">0</span></code> for zero length input.</p>
|
||
<p>C code:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Py_uhash_t</span> <span class="n">x</span><span class="p">;</span>
|
||
<span class="n">Py_ssize_t</span> <span class="nb">len</span><span class="p">;</span>
|
||
<span class="o">/*</span> <span class="n">p</span> <span class="ow">is</span> <span class="n">either</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span> <span class="ow">or</span> <span class="mi">4</span> <span class="n">byte</span> <span class="nb">type</span> <span class="o">*/</span>
|
||
<span class="n">unsigned</span> <span class="n">char</span> <span class="o">*</span><span class="n">p</span><span class="p">;</span>
|
||
<span class="n">Py_UCS2</span> <span class="o">*</span><span class="n">p</span><span class="p">;</span>
|
||
<span class="n">Py_UCS4</span> <span class="o">*</span><span class="n">p</span><span class="p">;</span>
|
||
|
||
<span class="k">if</span> <span class="p">(</span><span class="nb">len</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">(</span><span class="n">Py_uhash_t</span><span class="p">)</span> <span class="n">_Py_HashSecret</span><span class="o">.</span><span class="n">prefix</span><span class="p">;</span>
|
||
<span class="n">x</span> <span class="o">^=</span> <span class="p">(</span><span class="n">Py_uhash_t</span><span class="p">)</span> <span class="o">*</span><span class="n">p</span> <span class="o"><<</span> <span class="mi">7</span><span class="p">;</span>
|
||
<span class="k">for</span> <span class="p">(</span><span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o"><</span> <span class="nb">len</span><span class="p">;</span> <span class="n">i</span><span class="o">++</span><span class="p">)</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1000003</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> <span class="o">^</span> <span class="p">(</span><span class="n">Py_uhash_t</span><span class="p">)</span> <span class="o">*</span><span class="n">p</span><span class="o">++</span><span class="p">;</span>
|
||
<span class="n">x</span> <span class="o">^=</span> <span class="p">(</span><span class="n">Py_uhash_t</span><span class="p">)</span> <span class="nb">len</span><span class="p">;</span>
|
||
<span class="n">x</span> <span class="o">^=</span> <span class="p">(</span><span class="n">Py_uhash_t</span><span class="p">)</span> <span class="n">_Py_HashSecret</span><span class="o">.</span><span class="n">suffix</span><span class="p">;</span>
|
||
<span class="k">return</span> <span class="n">x</span><span class="p">;</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Which roughly translates to Python:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">fnv</span><span class="p">(</span><span class="n">p</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="mi">0</span>
|
||
|
||
<span class="c1"># bit mask, 2**32-1 or 2**64-1</span>
|
||
<span class="n">mask</span> <span class="o">=</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">sys</span><span class="o">.</span><span class="n">maxsize</span> <span class="o">+</span> <span class="mi">1</span>
|
||
|
||
<span class="n">x</span> <span class="o">=</span> <span class="n">hashsecret</span><span class="o">.</span><span class="n">prefix</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">^</span> <span class="p">(</span><span class="nb">ord</span><span class="p">(</span><span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o"><<</span> <span class="mi">7</span><span class="p">))</span> <span class="o">&</span> <span class="n">mask</span>
|
||
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">p</span><span class="p">:</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">((</span><span class="mi">1000003</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> <span class="o">^</span> <span class="nb">ord</span><span class="p">(</span><span class="n">c</span><span class="p">))</span> <span class="o">&</span> <span class="n">mask</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">^</span> <span class="nb">len</span><span class="p">(</span><span class="n">p</span><span class="p">))</span> <span class="o">&</span> <span class="n">mask</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">^</span> <span class="n">hashsecret</span><span class="o">.</span><span class="n">suffix</span><span class="p">)</span> <span class="o">&</span> <span class="n">mask</span>
|
||
|
||
<span class="k">if</span> <span class="n">x</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span>
|
||
|
||
<span class="k">return</span> <span class="n">x</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>FNV is a simple multiply and XOR algorithm with no cryptographic properties.
|
||
The randomization was not part of the initial hash code, but was added as
|
||
counter measure against hash collision attacks as explained in oCERT-2011-003
|
||
<a class="reference internal" href="#ocert" id="id8"><span>[ocert]</span></a>. Because FNV is not a cryptographic hash algorithm and the dict
|
||
implementation is not fortified against side channel analysis, the
|
||
randomization secrets can be calculated by a remote attacker. The author of
|
||
this PEP strongly believes that the nature of a non-cryptographic hash
|
||
function makes it impossible to conceal the secrets.</p>
|
||
</section>
|
||
<section id="examined-hashing-algorithms">
|
||
<h2><a class="toc-backref" href="#examined-hashing-algorithms" role="doc-backlink">Examined hashing algorithms</a></h2>
|
||
<p>The author of this PEP has researched several hashing algorithms that are
|
||
considered modern, fast and state-of-the-art.</p>
|
||
<section id="siphash">
|
||
<h3><a class="toc-backref" href="#siphash" role="doc-backlink">SipHash</a></h3>
|
||
<p>SipHash <a class="reference internal" href="#sip" id="id9"><span>[sip]</span></a> is a cryptographic pseudo random function with a 128-bit seed
|
||
and 64-bit output. It was designed by Jean-Philippe Aumasson and Daniel J.
|
||
Bernstein as a fast and secure keyed hash algorithm. It’s used by Ruby, Perl,
|
||
OpenDNS, Rust, Redis, FreeBSD and more. The C reference implementation has
|
||
been released under CC0 license (public domain).</p>
|
||
<p>Quote from SipHash’s site:</p>
|
||
<blockquote>
|
||
<div>SipHash is a family of pseudorandom functions (a.k.a. keyed hash
|
||
functions) optimized for speed on short messages. Target applications
|
||
include network traffic authentication and defense against hash-flooding
|
||
DoS attacks.</div></blockquote>
|
||
<p>siphash24 is the recommend variant with best performance. It uses 2 rounds per
|
||
message block and 4 finalization rounds. Besides the reference implementation
|
||
several other implementations are available. Some are single-shot functions,
|
||
others use a Merkle–Damgård construction-like approach with init, update and
|
||
finalize functions. Marek Majkowski C implementation csiphash <a class="reference internal" href="#csiphash" id="id10"><span>[csiphash]</span></a>
|
||
defines the prototype of the function. (Note: <code class="docutils literal notranslate"><span class="pre">k</span></code> is split up into two
|
||
uint64_t):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">uint64_t</span> <span class="n">siphash24</span><span class="p">(</span><span class="n">const</span> <span class="n">void</span> <span class="o">*</span><span class="n">src</span><span class="p">,</span> <span class="n">unsigned</span> <span class="n">long</span> <span class="n">src_sz</span><span class="p">,</span> <span class="n">const</span> <span class="n">char</span> <span class="n">k</span><span class="p">[</span><span class="mi">16</span><span class="p">])</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>SipHash requires a 64-bit data type and is not compatible with pure C89
|
||
platforms.</p>
|
||
</section>
|
||
<section id="murmurhash">
|
||
<h3><a class="toc-backref" href="#murmurhash" role="doc-backlink">MurmurHash</a></h3>
|
||
<p>MurmurHash <a class="reference internal" href="#murmur" id="id11"><span>[murmur]</span></a> is a family of non-cryptographic keyed hash function
|
||
developed by Austin Appleby. Murmur3 is the latest and fast variant of
|
||
MurmurHash. The C++ reference implementation has been released into public
|
||
domain. It features 32- or 128-bit output with a 32-bit seed. (Note: The out
|
||
parameter is a buffer with either 1 or 4 bytes.)</p>
|
||
<p>Murmur3’s function prototypes are:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">void</span> <span class="n">MurmurHash3_x86_32</span><span class="p">(</span><span class="n">const</span> <span class="n">void</span> <span class="o">*</span><span class="n">key</span><span class="p">,</span> <span class="nb">int</span> <span class="nb">len</span><span class="p">,</span> <span class="n">uint32_t</span> <span class="n">seed</span><span class="p">,</span> <span class="n">void</span> <span class="o">*</span><span class="n">out</span><span class="p">)</span>
|
||
|
||
<span class="n">void</span> <span class="n">MurmurHash3_x86_128</span><span class="p">(</span><span class="n">const</span> <span class="n">void</span> <span class="o">*</span><span class="n">key</span><span class="p">,</span> <span class="nb">int</span> <span class="nb">len</span><span class="p">,</span> <span class="n">uint32_t</span> <span class="n">seed</span><span class="p">,</span> <span class="n">void</span> <span class="o">*</span><span class="n">out</span><span class="p">)</span>
|
||
|
||
<span class="n">void</span> <span class="n">MurmurHash3_x64_128</span><span class="p">(</span><span class="n">const</span> <span class="n">void</span> <span class="o">*</span><span class="n">key</span><span class="p">,</span> <span class="nb">int</span> <span class="nb">len</span><span class="p">,</span> <span class="n">uint32_t</span> <span class="n">seed</span><span class="p">,</span> <span class="n">void</span> <span class="o">*</span><span class="n">out</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The 128-bit variants requires a 64-bit data type and are not compatible with
|
||
pure C89 platforms. The 32-bit variant is fully C89-compatible.</p>
|
||
<p>Aumasson, Bernstein and Boßlet have shown <a class="reference internal" href="#sip" id="id12"><span>[sip]</span></a> <a class="reference internal" href="#ocert-2012-001" id="id13"><span>[ocert-2012-001]</span></a> that
|
||
Murmur3 is not resilient against hash collision attacks. Therefore, Murmur3
|
||
can no longer be considered as secure algorithm. It still may be an
|
||
alternative if hash collision attacks are of no concern.</p>
|
||
</section>
|
||
<section id="cityhash">
|
||
<h3><a class="toc-backref" href="#cityhash" role="doc-backlink">CityHash</a></h3>
|
||
<p>CityHash <a class="reference internal" href="#city" id="id14"><span>[city]</span></a> is a family of non-cryptographic hash function developed by
|
||
Geoff Pike and Jyrki Alakuijala for Google. The C++ reference implementation
|
||
has been released under MIT license. The algorithm is partly based on
|
||
MurmurHash and claims to be faster. It supports 64- and 128-bit output with a
|
||
128-bit seed as well as 32-bit output without seed.</p>
|
||
<p>The relevant function prototype for 64-bit CityHash with 128-bit seed is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">uint64</span> <span class="n">CityHash64WithSeeds</span><span class="p">(</span><span class="n">const</span> <span class="n">char</span> <span class="o">*</span><span class="n">buf</span><span class="p">,</span> <span class="n">size_t</span> <span class="nb">len</span><span class="p">,</span> <span class="n">uint64</span> <span class="n">seed0</span><span class="p">,</span>
|
||
<span class="n">uint64</span> <span class="n">seed1</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>CityHash also offers SSE 4.2 optimizations with CRC32 intrinsic for long
|
||
inputs. All variants except CityHash32 require 64-bit data types. CityHash32
|
||
uses only 32-bit data types but it doesn’t support seeding.</p>
|
||
<p>Like MurmurHash Aumasson, Bernstein and Boßlet have shown <a class="reference internal" href="#sip" id="id15"><span>[sip]</span></a> a similar
|
||
weakness in CityHash.</p>
|
||
</section>
|
||
<section id="djbx33a">
|
||
<h3><a class="toc-backref" href="#djbx33a" role="doc-backlink">DJBX33A</a></h3>
|
||
<p>DJBX33A is a very simple multiplication and addition algorithm by Daniel
|
||
J. Bernstein. It is fast and has low setup costs but it’s not secure against
|
||
hash collision attacks. Its properties make it a viable choice for small
|
||
string hashing optimization.</p>
|
||
</section>
|
||
<section id="other">
|
||
<h3><a class="toc-backref" href="#other" role="doc-backlink">Other</a></h3>
|
||
<p>Crypto algorithms such as HMAC, MD5, SHA-1 or SHA-2 are too slow and have
|
||
high setup and finalization costs. For these reasons they are not considered
|
||
fit for this purpose. Modern AMD and Intel CPUs have AES-NI (AES instruction
|
||
set) <a class="reference internal" href="#aes-ni" id="id16"><span>[aes-ni]</span></a> to speed up AES encryption. CMAC with AES-NI might be a viable
|
||
option but it’s probably too slow for daily operation. (testing required)</p>
|
||
</section>
|
||
<section id="conclusion">
|
||
<h3><a class="toc-backref" href="#conclusion" role="doc-backlink">Conclusion</a></h3>
|
||
<p>SipHash provides the best combination of speed and security. Developers of
|
||
other prominent projects have came to the same conclusion.</p>
|
||
</section>
|
||
</section>
|
||
<section id="small-string-optimization">
|
||
<h2><a class="toc-backref" href="#small-string-optimization" role="doc-backlink">Small string optimization</a></h2>
|
||
<p>Hash functions like SipHash24 have a costly initialization and finalization
|
||
code that can dominate speed of the algorithm for very short strings. On the
|
||
other hand, Python calculates the hash value of short strings quite often. A
|
||
simple and fast function for especially for hashing of small strings can make
|
||
a measurable impact on performance. For example, these measurements were taken
|
||
during a run of Python’s regression tests. Additional measurements of other
|
||
code have shown a similar distribution.</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head">bytes</th>
|
||
<th class="head">hash() calls</th>
|
||
<th class="head">portion</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td>1</td>
|
||
<td>18709</td>
|
||
<td>0.2%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>2</td>
|
||
<td>737480</td>
|
||
<td>9.5%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>3</td>
|
||
<td>636178</td>
|
||
<td>17.6%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>4</td>
|
||
<td>1518313</td>
|
||
<td>36.7%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>5</td>
|
||
<td>643022</td>
|
||
<td>44.9%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>6</td>
|
||
<td>770478</td>
|
||
<td>54.6%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>7</td>
|
||
<td>525150</td>
|
||
<td>61.2%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>8</td>
|
||
<td>304873</td>
|
||
<td>65.1%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>9</td>
|
||
<td>297272</td>
|
||
<td>68.8%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>10</td>
|
||
<td>68191</td>
|
||
<td>69.7%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>11</td>
|
||
<td>1388484</td>
|
||
<td>87.2%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>12</td>
|
||
<td>480786</td>
|
||
<td>93.3%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>13</td>
|
||
<td>52730</td>
|
||
<td>93.9%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>14</td>
|
||
<td>65309</td>
|
||
<td>94.8%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>15</td>
|
||
<td>44245</td>
|
||
<td>95.3%</td>
|
||
</tr>
|
||
<tr class="row-odd"><td>16</td>
|
||
<td>85643</td>
|
||
<td>96.4%</td>
|
||
</tr>
|
||
<tr class="row-even"><td>Total</td>
|
||
<td>7921678</td>
|
||
<td></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>However a fast function like DJBX33A is not as secure as SipHash24. A cutoff
|
||
at about 5 to 7 bytes should provide a decent safety margin and speed up at
|
||
the same time. The PEP’s reference implementation provides such a cutoff with
|
||
<code class="docutils literal notranslate"><span class="pre">Py_HASH_CUTOFF</span></code>. The optimization is disabled by default for several
|
||
reasons. For one the security implications are unclear yet and should be
|
||
thoroughly studied before the optimization is enabled by default. Secondly
|
||
the performance benefits vary. On 64 bit Linux system with Intel Core i7
|
||
multiple runs of Python’s benchmark suite <a class="reference internal" href="#pybench" id="id17"><span>[pybench]</span></a> show an average speedups
|
||
between 3% and 5% for benchmarks such as django_v2, mako and etree with a
|
||
cutoff of 7. Benchmarks with X86 binaries and Windows X86_64 builds on the
|
||
same machine are a bit slower with small string optimization.</p>
|
||
<p>The state of small string optimization will be assessed during the beta phase
|
||
of Python 3.4. The feature will either be enabled with appropriate values
|
||
or the code will be removed before beta 2 is released.</p>
|
||
</section>
|
||
<section id="c-api-additions">
|
||
<h2><a class="toc-backref" href="#c-api-additions" role="doc-backlink">C API additions</a></h2>
|
||
<p>All C API extension modifications are not part of the stable API.</p>
|
||
<section id="hash-secret">
|
||
<h3><a class="toc-backref" href="#hash-secret" role="doc-backlink">hash secret</a></h3>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">_Py_HashSecret_t</span></code> type of Python 2.6 to 3.3 has two members with either
|
||
32- or 64-bit length each. SipHash requires two 64-bit unsigned integers as
|
||
keys. The typedef will be changed to a union with a guaranteed size of 24
|
||
bytes on all architectures. The union provides a 128 bit random key for
|
||
SipHash24 and FNV as well as an additional value of 64 bit for the optional
|
||
small string optimization and pyexpat seed. The additional 64 bit seed ensures
|
||
that pyexpat or small string optimization cannot reveal bits of the SipHash24
|
||
seed.</p>
|
||
<p>memory layout on 64 bit systems:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cccccccc</span> <span class="n">cccccccc</span> <span class="n">cccccccc</span> <span class="n">uc</span> <span class="o">--</span> <span class="n">unsigned</span> <span class="n">char</span><span class="p">[</span><span class="mi">24</span><span class="p">]</span>
|
||
<span class="n">pppppppp</span> <span class="n">ssssssss</span> <span class="o">........</span> <span class="n">fnv</span> <span class="o">--</span> <span class="n">two</span> <span class="n">Py_hash_t</span>
|
||
<span class="n">k0k0k0k0</span> <span class="n">k1k1k1k1</span> <span class="o">........</span> <span class="n">siphash</span> <span class="o">--</span> <span class="n">two</span> <span class="n">PY_UINT64_T</span>
|
||
<span class="o">........</span> <span class="o">........</span> <span class="n">ssssssss</span> <span class="n">djbx33a</span> <span class="o">--</span> <span class="mi">16</span> <span class="nb">bytes</span> <span class="n">padding</span> <span class="o">+</span> <span class="n">one</span> <span class="n">Py_hash_t</span>
|
||
<span class="o">........</span> <span class="o">........</span> <span class="n">eeeeeeee</span> <span class="n">pyexpat</span> <span class="n">XML</span> <span class="nb">hash</span> <span class="n">salt</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>memory layout on 32 bit systems:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cccccccc</span> <span class="n">cccccccc</span> <span class="n">cccccccc</span> <span class="n">uc</span> <span class="o">--</span> <span class="n">unsigned</span> <span class="n">char</span><span class="p">[</span><span class="mi">24</span><span class="p">]</span>
|
||
<span class="n">ppppssss</span> <span class="o">........</span> <span class="o">........</span> <span class="n">fnv</span> <span class="o">--</span> <span class="n">two</span> <span class="n">Py_hash_t</span>
|
||
<span class="n">k0k0k0k0</span> <span class="n">k1k1k1k1</span> <span class="o">........</span> <span class="n">siphash</span> <span class="o">--</span> <span class="n">two</span> <span class="n">PY_UINT64_T</span> <span class="p">(</span><span class="k">if</span> <span class="n">available</span><span class="p">)</span>
|
||
<span class="o">........</span> <span class="o">........</span> <span class="n">ssss</span><span class="o">....</span> <span class="n">djbx33a</span> <span class="o">--</span> <span class="mi">16</span> <span class="nb">bytes</span> <span class="n">padding</span> <span class="o">+</span> <span class="n">one</span> <span class="n">Py_hash_t</span>
|
||
<span class="o">........</span> <span class="o">........</span> <span class="n">eeee</span><span class="o">....</span> <span class="n">pyexpat</span> <span class="n">XML</span> <span class="nb">hash</span> <span class="n">salt</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>new type definition:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>typedef union {
|
||
/* ensure 24 bytes */
|
||
unsigned char uc[24];
|
||
/* two Py_hash_t for FNV */
|
||
struct {
|
||
Py_hash_t prefix;
|
||
Py_hash_t suffix;
|
||
} fnv;
|
||
#ifdef PY_UINT64_T
|
||
/* two uint64 for SipHash24 */
|
||
struct {
|
||
PY_UINT64_T k0;
|
||
PY_UINT64_T k1;
|
||
} siphash;
|
||
#endif
|
||
/* a different (!) Py_hash_t for small string optimization */
|
||
struct {
|
||
unsigned char padding[16];
|
||
Py_hash_t suffix;
|
||
} djbx33a;
|
||
struct {
|
||
unsigned char padding[16];
|
||
Py_hash_t hashsalt;
|
||
} expat;
|
||
} _Py_HashSecret_t;
|
||
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">_Py_HashSecret_t</span></code> is initialized in <code class="docutils literal notranslate"><span class="pre">Python/random.c:_PyRandom_Init()</span></code>
|
||
exactly once at startup.</p>
|
||
</section>
|
||
<section id="hash-function-definition">
|
||
<h3><a class="toc-backref" href="#hash-function-definition" role="doc-backlink">hash function definition</a></h3>
|
||
<p>Implementation:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">typedef</span> <span class="n">struct</span> <span class="p">{</span>
|
||
<span class="o">/*</span> <span class="n">function</span> <span class="n">pointer</span> <span class="n">to</span> <span class="nb">hash</span> <span class="n">function</span><span class="p">,</span> <span class="n">e</span><span class="o">.</span><span class="n">g</span><span class="o">.</span> <span class="n">fnv</span> <span class="ow">or</span> <span class="n">siphash24</span> <span class="o">*/</span>
|
||
<span class="n">Py_hash_t</span> <span class="p">(</span><span class="o">*</span><span class="n">const</span> <span class="nb">hash</span><span class="p">)(</span><span class="n">const</span> <span class="n">void</span> <span class="o">*</span><span class="p">,</span> <span class="n">Py_ssize_t</span><span class="p">);</span>
|
||
<span class="n">const</span> <span class="n">char</span> <span class="o">*</span><span class="n">name</span><span class="p">;</span> <span class="o">/*</span> <span class="n">name</span> <span class="n">of</span> <span class="n">the</span> <span class="nb">hash</span> <span class="n">algorithm</span> <span class="ow">and</span> <span class="n">variant</span> <span class="o">*/</span>
|
||
<span class="n">const</span> <span class="nb">int</span> <span class="n">hash_bits</span><span class="p">;</span> <span class="o">/*</span> <span class="n">internal</span> <span class="n">size</span> <span class="n">of</span> <span class="nb">hash</span> <span class="n">value</span> <span class="o">*/</span>
|
||
<span class="n">const</span> <span class="nb">int</span> <span class="n">seed_bits</span><span class="p">;</span> <span class="o">/*</span> <span class="n">size</span> <span class="n">of</span> <span class="n">seed</span> <span class="nb">input</span> <span class="o">*/</span>
|
||
<span class="p">}</span> <span class="n">PyHash_FuncDef</span><span class="p">;</span>
|
||
|
||
<span class="n">PyAPI_FUNC</span><span class="p">(</span><span class="n">PyHash_FuncDef</span><span class="o">*</span><span class="p">)</span> <span class="n">PyHash_GetFuncDef</span><span class="p">(</span><span class="n">void</span><span class="p">);</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="autoconf">
|
||
<h3><a class="toc-backref" href="#autoconf" role="doc-backlink">autoconf</a></h3>
|
||
<p>A new test is added to the configure script. The test sets
|
||
<code class="docutils literal notranslate"><span class="pre">HAVE_ALIGNED_REQUIRED</span></code>, when it detects a platform, that requires aligned
|
||
memory access for integers. Must current platforms such as X86, X86_64 and
|
||
modern ARM don’t need aligned data.</p>
|
||
<p>A new option <code class="docutils literal notranslate"><span class="pre">--with-hash-algorithm</span></code> enables the user to select a hash
|
||
algorithm in the configure step.</p>
|
||
</section>
|
||
<section id="hash-function-selection">
|
||
<h3><a class="toc-backref" href="#hash-function-selection" role="doc-backlink">hash function selection</a></h3>
|
||
<p>The value of the macro <code class="docutils literal notranslate"><span class="pre">Py_HASH_ALGORITHM</span></code> defines which hash algorithm is
|
||
used internally. It may be set to any of the three values <code class="docutils literal notranslate"><span class="pre">Py_HASH_SIPHASH24</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">Py_HASH_FNV</span></code> or <code class="docutils literal notranslate"><span class="pre">Py_HASH_EXTERNAL</span></code>. If <code class="docutils literal notranslate"><span class="pre">Py_HASH_ALGORITHM</span></code> is not
|
||
defined at all, then the best available algorithm is selected. On platforms
|
||
which don’t require aligned memory access (<code class="docutils literal notranslate"><span class="pre">HAVE_ALIGNED_REQUIRED</span></code> not
|
||
defined) and an unsigned 64 bit integer type <code class="docutils literal notranslate"><span class="pre">PY_UINT64_T</span></code>, SipHash24 is
|
||
used. On strict C89 platforms without a 64 bit data type, or architectures such
|
||
as SPARC, FNV is selected as fallback. A hash algorithm can be selected with
|
||
an autoconf option, for example <code class="docutils literal notranslate"><span class="pre">./configure</span> <span class="pre">--with-hash-algorithm=fnv</span></code>.</p>
|
||
<p>The value <code class="docutils literal notranslate"><span class="pre">Py_HASH_EXTERNAL</span></code> allows 3rd parties to provide their own
|
||
implementation at compile time.</p>
|
||
<p>Implementation:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1">#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL</span>
|
||
<span class="n">extern</span> <span class="n">PyHash_FuncDef</span> <span class="n">PyHash_Func</span><span class="p">;</span>
|
||
<span class="c1">#elif Py_HASH_ALGORITHM == Py_HASH_SIPHASH24</span>
|
||
<span class="n">static</span> <span class="n">PyHash_FuncDef</span> <span class="n">PyHash_Func</span> <span class="o">=</span> <span class="p">{</span><span class="n">siphash24</span><span class="p">,</span> <span class="s2">"siphash24"</span><span class="p">,</span> <span class="mi">64</span><span class="p">,</span> <span class="mi">128</span><span class="p">};</span>
|
||
<span class="c1">#elif Py_HASH_ALGORITHM == Py_HASH_FNV</span>
|
||
<span class="n">static</span> <span class="n">PyHash_FuncDef</span> <span class="n">PyHash_Func</span> <span class="o">=</span> <span class="p">{</span><span class="n">fnv</span><span class="p">,</span> <span class="s2">"fnv"</span><span class="p">,</span> <span class="mi">8</span> <span class="o">*</span> <span class="n">sizeof</span><span class="p">(</span><span class="n">Py_hash_t</span><span class="p">),</span>
|
||
<span class="mi">16</span> <span class="o">*</span> <span class="n">sizeof</span><span class="p">(</span><span class="n">Py_hash_t</span><span class="p">)};</span>
|
||
<span class="c1">#endif</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="python-api-addition">
|
||
<h2><a class="toc-backref" href="#python-api-addition" role="doc-backlink">Python API addition</a></h2>
|
||
<section id="sys-module">
|
||
<h3><a class="toc-backref" href="#sys-module" role="doc-backlink">sys module</a></h3>
|
||
<p>The sys module already has a hash_info struct sequence. More fields are added
|
||
to the object to reflect the active hash algorithm and its properties.</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">sys</span><span class="o">.</span><span class="n">hash_info</span><span class="p">(</span><span class="n">width</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
||
<span class="n">modulus</span><span class="o">=</span><span class="mi">2305843009213693951</span><span class="p">,</span>
|
||
<span class="n">inf</span><span class="o">=</span><span class="mi">314159</span><span class="p">,</span>
|
||
<span class="n">nan</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||
<span class="n">imag</span><span class="o">=</span><span class="mi">1000003</span><span class="p">,</span>
|
||
<span class="c1"># new fields:</span>
|
||
<span class="n">algorithm</span><span class="o">=</span><span class="s1">'siphash24'</span><span class="p">,</span>
|
||
<span class="n">hash_bits</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
||
<span class="n">seed_bits</span><span class="o">=</span><span class="mi">128</span><span class="p">,</span>
|
||
<span class="n">cutoff</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="necessary-modifications-to-c-code">
|
||
<h2><a class="toc-backref" href="#necessary-modifications-to-c-code" role="doc-backlink">Necessary modifications to C code</a></h2>
|
||
<section id="py-hashbytes-objects-object-c">
|
||
<h3><a class="toc-backref" href="#py-hashbytes-objects-object-c" role="doc-backlink">_Py_HashBytes() (Objects/object.c)</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code> is an internal helper function that provides the hashing
|
||
code for bytes, memoryview and datetime classes. It currently implements FNV
|
||
for <code class="docutils literal notranslate"><span class="pre">unsigned</span> <span class="pre">char</span> <span class="pre">*</span></code>.</p>
|
||
<p>The function is moved to Python/pyhash.c and modified to use the hash function
|
||
through PyHash_Func.hash(). The function signature is altered to take
|
||
a <code class="docutils literal notranslate"><span class="pre">const</span> <span class="pre">void</span> <span class="pre">*</span></code> as first argument. <code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code> also takes care of
|
||
special cases: it maps zero length input to <code class="docutils literal notranslate"><span class="pre">0</span></code> and return value of <code class="docutils literal notranslate"><span class="pre">-1</span></code>
|
||
to <code class="docutils literal notranslate"><span class="pre">-2</span></code>.</p>
|
||
</section>
|
||
<section id="bytes-hash-objects-bytesobject-c">
|
||
<h3><a class="toc-backref" href="#bytes-hash-objects-bytesobject-c" role="doc-backlink">bytes_hash() (Objects/bytesobject.c)</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">bytes_hash</span></code> uses <code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code> to provide the tp_hash slot function
|
||
for bytes objects. The function will continue to use <code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code>
|
||
but without a type cast.</p>
|
||
</section>
|
||
<section id="memory-hash-objects-memoryobject-c">
|
||
<h3><a class="toc-backref" href="#memory-hash-objects-memoryobject-c" role="doc-backlink">memory_hash() (Objects/memoryobject.c)</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">memory_hash</span></code> provides the tp_hash slot function for read-only memory
|
||
views if the original object is hashable, too. It’s the only function that
|
||
has to support hashing of unaligned memory segments in the future. The
|
||
function will continue to use <code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code> but without a type cast.</p>
|
||
</section>
|
||
<section id="unicode-hash-objects-unicodeobject-c">
|
||
<h3><a class="toc-backref" href="#unicode-hash-objects-unicodeobject-c" role="doc-backlink">unicode_hash() (Objects/unicodeobject.c)</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">unicode_hash</span></code> provides the tp_hash slot function for unicode. Right now it
|
||
implements the FNV algorithm three times for <code class="docutils literal notranslate"><span class="pre">unsigned</span> <span class="pre">char*</span></code>, <code class="docutils literal notranslate"><span class="pre">Py_UCS2</span></code>
|
||
and <code class="docutils literal notranslate"><span class="pre">Py_UCS4</span></code>. A reimplementation of the function must take care to use the
|
||
correct length. Since the macro <code class="docutils literal notranslate"><span class="pre">PyUnicode_GET_LENGTH</span></code> returns the length
|
||
of the unicode string and not its size in octets, the length must be
|
||
multiplied with the size of the internal unicode kind:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="p">(</span><span class="n">PyUnicode_READY</span><span class="p">(</span><span class="n">u</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span><span class="p">;</span>
|
||
<span class="n">x</span> <span class="o">=</span> <span class="n">_Py_HashBytes</span><span class="p">(</span><span class="n">PyUnicode_DATA</span><span class="p">(</span><span class="n">u</span><span class="p">),</span>
|
||
<span class="n">PyUnicode_GET_LENGTH</span><span class="p">(</span><span class="n">u</span><span class="p">)</span> <span class="o">*</span> <span class="n">PyUnicode_KIND</span><span class="p">(</span><span class="n">u</span><span class="p">));</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="generic-hash-modules-datetimemodule-c">
|
||
<h3><a class="toc-backref" href="#generic-hash-modules-datetimemodule-c" role="doc-backlink">generic_hash() (Modules/_datetimemodule.c)</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">generic_hash</span></code> acts as a wrapper around <code class="docutils literal notranslate"><span class="pre">_Py_HashBytes</span></code> for the tp_hash
|
||
slots of date, time and datetime types. timedelta objects are hashed by their
|
||
state (days, seconds, microseconds) and tzinfo objects are not hashable. The
|
||
data members of date, time and datetime types’ struct are not <code class="docutils literal notranslate"><span class="pre">void*</span></code> aligned.
|
||
This can easily by fixed with memcpy()ing four to ten bytes to an aligned
|
||
buffer.</p>
|
||
</section>
|
||
</section>
|
||
<section id="performance">
|
||
<h2><a class="toc-backref" href="#performance" role="doc-backlink">Performance</a></h2>
|
||
<p>In general the <a class="pep reference internal" href="../pep-0456/" title="PEP 456 – Secure and interchangeable hash algorithm">PEP 456</a> code with SipHash24 is about as fast as the old code
|
||
with FNV. SipHash24 seems to make better use of modern compilers, CPUs and
|
||
large L1 cache. Several benchmarks show a small speed improvement on 64 bit
|
||
CPUs such as Intel Core i5 and Intel Core i7 processes. 32 bit builds and
|
||
benchmarks on older CPUs such as an AMD Athlon X2 are slightly slower with
|
||
SipHash24. The performance increase or decrease are so small that they should
|
||
not affect any application code.</p>
|
||
<p>The benchmarks were conducted on CPython default branch revision b08868fd5994
|
||
and the PEP repository <a class="reference internal" href="#pep-456-repos" id="id18"><span>[pep-456-repos]</span></a>. All upstream changes were merged
|
||
into the <code class="docutils literal notranslate"><span class="pre">pep-456</span></code> branch. The “performance” CPU governor was configured and
|
||
almost all programs were stopped so the benchmarks were able to utilize
|
||
TurboBoost and the CPU caches as much as possible. The raw benchmark results
|
||
of multiple machines and platforms are made available at <a class="reference internal" href="#benchmarks" id="id19"><span>[benchmarks]</span></a>.</p>
|
||
<section id="hash-value-distribution">
|
||
<h3><a class="toc-backref" href="#hash-value-distribution" role="doc-backlink">Hash value distribution</a></h3>
|
||
<p>A good distribution of hash values is important for dict and set performance.
|
||
Both SipHash24 and FNV take the length of the input into account, so that
|
||
strings made up entirely of NULL bytes don’t have the same hash value. The
|
||
last bytes of the input tend to affect the least significant bits of the hash
|
||
value, too. That attribute reduces the amount of hash collisions for strings
|
||
with a common prefix.</p>
|
||
</section>
|
||
<section id="typical-length">
|
||
<h3><a class="toc-backref" href="#typical-length" role="doc-backlink">Typical length</a></h3>
|
||
<p>Serhiy Storchaka has shown in <a class="reference internal" href="#issue16427" id="id20"><span>[issue16427]</span></a> that a modified FNV
|
||
implementation with 64 bits per cycle is able to process long strings several
|
||
times faster than the current FNV implementation.</p>
|
||
<p>However, according to statistics <a class="reference internal" href="#issue19183" id="id21"><span>[issue19183]</span></a> a typical Python program as
|
||
well as the Python test suite have a hash ratio of about 50% small strings
|
||
between 1 and 6 bytes. Only 5% of the strings are larger than 16 bytes.</p>
|
||
</section>
|
||
<section id="grand-unified-python-benchmark-suite">
|
||
<h3><a class="toc-backref" href="#grand-unified-python-benchmark-suite" role="doc-backlink">Grand Unified Python Benchmark Suite</a></h3>
|
||
<p>Initial tests with an experimental implementation and the Grand Unified Python
|
||
Benchmark Suite have shown minimal deviations. The summarized total runtime
|
||
of the benchmark is within 1% of the runtime of an unmodified Python 3.4
|
||
binary. The tests were run on an Intel i7-2860QM machine with a 64-bit Linux
|
||
installation. The interpreter was compiled with GCC 4.7 for 64- and 32-bit.</p>
|
||
<p>More benchmarks will be conducted.</p>
|
||
</section>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>The modifications don’t alter any existing API.</p>
|
||
<p>The output of <code class="docutils literal notranslate"><span class="pre">hash()</span></code> for strings and bytes are going to be different. The
|
||
hash values for ASCII Unicode and ASCII bytes will stay equal.</p>
|
||
</section>
|
||
<section id="alternative-counter-measures-against-hash-collision-dos">
|
||
<h2><a class="toc-backref" href="#alternative-counter-measures-against-hash-collision-dos" role="doc-backlink">Alternative counter measures against hash collision DoS</a></h2>
|
||
<p>Three alternative countermeasures against hash collisions were discussed in
|
||
the past, but are not subject of this PEP.</p>
|
||
<ol class="arabic simple">
|
||
<li>Marc-Andre Lemburg has suggested that dicts shall count hash collisions. In
|
||
case an insert operation causes too many collisions an exception shall be
|
||
raised.</li>
|
||
<li>Some applications (e.g. PHP) limit the amount of keys for GET and POST
|
||
HTTP requests. The approach effectively leverages the impact of a hash
|
||
collision attack. (XXX citation needed)</li>
|
||
<li>Hash maps have a worst case of O(n) for insertion and lookup of keys. This
|
||
results in a quadratic runtime during a hash collision attack. The
|
||
introduction of a new and additional data structure with O(log n)
|
||
worst case behavior would eliminate the root cause. A data structures like
|
||
red-black-tree or prefix trees (trie <a class="reference internal" href="#trie" id="id22"><span>[trie]</span></a>) would have other benefits,
|
||
too. Prefix trees with stringed keyed can reduce memory usage as common
|
||
prefixes are stored within the tree structure.</li>
|
||
</ol>
|
||
</section>
|
||
<section id="discussion">
|
||
<h2><a class="toc-backref" href="#discussion" role="doc-backlink">Discussion</a></h2>
|
||
<section id="pluggable">
|
||
<h3><a class="toc-backref" href="#pluggable" role="doc-backlink">Pluggable</a></h3>
|
||
<p>The first draft of this PEP made the hash algorithm pluggable at runtime. It
|
||
supported multiple hash algorithms in one binary to give the user the
|
||
possibility to select a hash algorithm at startup. The approach was considered
|
||
an unnecessary complication by several core committers <a class="reference internal" href="#id26" id="id23"><span>[pluggable]</span></a>. Subsequent
|
||
versions of the PEP aim for compile time configuration.</p>
|
||
</section>
|
||
<section id="non-aligned-memory-access">
|
||
<h3><a class="toc-backref" href="#non-aligned-memory-access" role="doc-backlink">Non-aligned memory access</a></h3>
|
||
<p>The implementation of SipHash24 were criticized because it ignores the issue
|
||
of non-aligned memory and therefore doesn’t work on architectures that
|
||
requires alignment of integer types. The PEP deliberately neglects this
|
||
special case and doesn’t support SipHash24 on such platforms. It’s simply
|
||
not considered worth the trouble until proven otherwise. All major platforms
|
||
like X86, X86_64 and ARMv6+ can handle unaligned memory with minimal or even
|
||
no speed impact. <a class="reference internal" href="#alignmentmyth" id="id24"><span>[alignmentmyth]</span></a></p>
|
||
<p>Almost every block is properly aligned anyway. At present bytes’ and str’s
|
||
data are always aligned. Only memoryviews can point to unaligned blocks
|
||
under rare circumstances. The PEP implementation is optimized and simplified
|
||
for the common case.</p>
|
||
</section>
|
||
<section id="ascii-str-bytes-hash-collision">
|
||
<h3><a class="toc-backref" href="#ascii-str-bytes-hash-collision" role="doc-backlink">ASCII str / bytes hash collision</a></h3>
|
||
<p>Since the implementation of <a class="pep reference internal" href="../pep-0393/" title="PEP 393 – Flexible String Representation">PEP 393</a>, bytes and ASCII text have the same
|
||
memory layout. Because of this the new hashing API will keep the invariant:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nb">hash</span><span class="p">(</span><span class="s2">"ascii string"</span><span class="p">)</span> <span class="o">==</span> <span class="nb">hash</span><span class="p">(</span><span class="sa">b</span><span class="s2">"ascii string"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>for ASCII string and ASCII bytes. Equal hash values result in a hash collision
|
||
and therefore cause a minor speed penalty for dicts and sets with mixed keys.
|
||
The cause of the collision could be removed by e.g. subtracting <code class="docutils literal notranslate"><span class="pre">2</span></code> from
|
||
the hash value of bytes. <code class="docutils literal notranslate"><span class="pre">-2</span></code> because <code class="docutils literal notranslate"><span class="pre">hash(b"")</span> <span class="pre">==</span> <span class="pre">0</span></code> and <code class="docutils literal notranslate"><span class="pre">-1</span></code> is
|
||
reserved. The PEP doesn’t change the hash value.</p>
|
||
</section>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<ul class="simple">
|
||
<li>Issue 19183 <a class="reference internal" href="#issue19183" id="id25"><span>[issue19183]</span></a> contains a reference implementation.</li>
|
||
</ul>
|
||
<div role="list" class="citation-list">
|
||
<div class="citation" id="c3" role="doc-biblioentry">
|
||
<dt class="label" id="c3">[<a href="#id2">29c3</a>]</dt>
|
||
<dd><a class="reference external" href="http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html">http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html</a></div>
|
||
<div class="citation" id="fnv" role="doc-biblioentry">
|
||
<dt class="label" id="fnv">[<a href="#id6">fnv</a>]</dt>
|
||
<dd><a class="reference external" href="http://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function">http://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function</a></div>
|
||
<div class="citation" id="sip" role="doc-biblioentry">
|
||
<dt class="label" id="sip">[sip]<em> (<a href='#id5'>1</a>, <a href='#id9'>2</a>, <a href='#id12'>3</a>, <a href='#id15'>4</a>) </em></dt>
|
||
<dd><a class="reference external" href="https://131002.net/siphash/">https://131002.net/siphash/</a></div>
|
||
<div class="citation" id="ocert" role="doc-biblioentry">
|
||
<dt class="label" id="ocert">[<a href="#id8">ocert</a>]</dt>
|
||
<dd><a class="reference external" href="http://www.nruns.com/_downloads/advisory28122011.pdf">http://www.nruns.com/_downloads/advisory28122011.pdf</a></div>
|
||
<div class="citation" id="ocert-2012-001" role="doc-biblioentry">
|
||
<dt class="label" id="ocert-2012-001">[<a href="#id13">ocert-2012-001</a>]</dt>
|
||
<dd><a class="reference external" href="http://www.ocert.org/advisories/ocert-2012-001.html">http://www.ocert.org/advisories/ocert-2012-001.html</a></div>
|
||
<div class="citation" id="poc" role="doc-biblioentry">
|
||
<dt class="label" id="poc">[<a href="#id4">poc</a>]</dt>
|
||
<dd><a class="reference external" href="https://131002.net/siphash/poc.py">https://131002.net/siphash/poc.py</a></div>
|
||
<div class="citation" id="issue13703" role="doc-biblioentry">
|
||
<dt class="label" id="issue13703">[issue13703]<em> (<a href='#id1'>1</a>, <a href='#id7'>2</a>) </em></dt>
|
||
<dd><a class="reference external" href="http://bugs.python.org/issue13703">http://bugs.python.org/issue13703</a></div>
|
||
<div class="citation" id="issue14621" role="doc-biblioentry">
|
||
<dt class="label" id="issue14621">[<a href="#id3">issue14621</a>]</dt>
|
||
<dd><a class="reference external" href="http://bugs.python.org/issue14621">http://bugs.python.org/issue14621</a></div>
|
||
<div class="citation" id="issue16427" role="doc-biblioentry">
|
||
<dt class="label" id="issue16427">[<a href="#id20">issue16427</a>]</dt>
|
||
<dd><a class="reference external" href="http://bugs.python.org/issue16427">http://bugs.python.org/issue16427</a></div>
|
||
<div class="citation" id="issue19183" role="doc-biblioentry">
|
||
<dt class="label" id="issue19183">[issue19183]<em> (<a href='#id21'>1</a>, <a href='#id25'>2</a>) </em></dt>
|
||
<dd><a class="reference external" href="http://bugs.python.org/issue19183">http://bugs.python.org/issue19183</a></div>
|
||
<div class="citation" id="trie" role="doc-biblioentry">
|
||
<dt class="label" id="trie">[<a href="#id22">trie</a>]</dt>
|
||
<dd><a class="reference external" href="http://en.wikipedia.org/wiki/Trie">http://en.wikipedia.org/wiki/Trie</a></div>
|
||
<div class="citation" id="city" role="doc-biblioentry">
|
||
<dt class="label" id="city">[<a href="#id14">city</a>]</dt>
|
||
<dd><a class="reference external" href="http://code.google.com/p/cityhash/">http://code.google.com/p/cityhash/</a></div>
|
||
<div class="citation" id="murmur" role="doc-biblioentry">
|
||
<dt class="label" id="murmur">[<a href="#id11">murmur</a>]</dt>
|
||
<dd><a class="reference external" href="http://code.google.com/p/smhasher/">http://code.google.com/p/smhasher/</a></div>
|
||
<div class="citation" id="csiphash" role="doc-biblioentry">
|
||
<dt class="label" id="csiphash">[<a href="#id10">csiphash</a>]</dt>
|
||
<dd><a class="reference external" href="https://github.com/majek/csiphash/">https://github.com/majek/csiphash/</a></div>
|
||
<div class="citation" id="aes-ni" role="doc-biblioentry">
|
||
<dt class="label" id="aes-ni">[<a href="#id16">aes-ni</a>]</dt>
|
||
<dd><a class="reference external" href="http://en.wikipedia.org/wiki/AES_instruction_set">http://en.wikipedia.org/wiki/AES_instruction_set</a></div>
|
||
<div class="citation" id="id26" role="doc-biblioentry">
|
||
<dt class="label" id="id26">[<a href="#id23">pluggable</a>]</dt>
|
||
<dd><a class="reference external" href="https://mail.python.org/pipermail/python-dev/2013-October/129138.html">https://mail.python.org/pipermail/python-dev/2013-October/129138.html</a></div>
|
||
<div class="citation" id="alignmentmyth" role="doc-biblioentry">
|
||
<dt class="label" id="alignmentmyth">[<a href="#id24">alignmentmyth</a>]</dt>
|
||
<dd><a class="reference external" href="http://lemire.me/blog/archives/2012/05/31/data-alignment-for-speed-myth-or-reality/">http://lemire.me/blog/archives/2012/05/31/data-alignment-for-speed-myth-or-reality/</a></div>
|
||
<div class="citation" id="pybench" role="doc-biblioentry">
|
||
<dt class="label" id="pybench">[<a href="#id17">pybench</a>]</dt>
|
||
<dd><a class="reference external" href="http://hg.python.org/benchmarks/">http://hg.python.org/benchmarks/</a></div>
|
||
<div class="citation" id="benchmarks" role="doc-biblioentry">
|
||
<dt class="label" id="benchmarks">[<a href="#id19">benchmarks</a>]</dt>
|
||
<dd><a class="reference external" href="https://bitbucket.org/tiran/pep-456-benchmarks/src">https://bitbucket.org/tiran/pep-456-benchmarks/src</a></div>
|
||
<div class="citation" id="pep-456-repos" role="doc-biblioentry">
|
||
<dt class="label" id="pep-456-repos">[<a href="#id18">pep-456-repos</a>]</dt>
|
||
<dd><a class="reference external" href="http://hg.python.org/features/pep-456">http://hg.python.org/features/pep-456</a></div>
|
||
</div>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0456.rst">https://github.com/python/peps/blob/main/peps/pep-0456.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0456.rst">2023-10-11 12:05:51 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#requirements-for-a-hash-function">Requirements for a hash function</a></li>
|
||
<li><a class="reference internal" href="#current-implementation-with-modified-fnv">Current implementation with modified FNV</a></li>
|
||
<li><a class="reference internal" href="#examined-hashing-algorithms">Examined hashing algorithms</a><ul>
|
||
<li><a class="reference internal" href="#siphash">SipHash</a></li>
|
||
<li><a class="reference internal" href="#murmurhash">MurmurHash</a></li>
|
||
<li><a class="reference internal" href="#cityhash">CityHash</a></li>
|
||
<li><a class="reference internal" href="#djbx33a">DJBX33A</a></li>
|
||
<li><a class="reference internal" href="#other">Other</a></li>
|
||
<li><a class="reference internal" href="#conclusion">Conclusion</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#small-string-optimization">Small string optimization</a></li>
|
||
<li><a class="reference internal" href="#c-api-additions">C API additions</a><ul>
|
||
<li><a class="reference internal" href="#hash-secret">hash secret</a></li>
|
||
<li><a class="reference internal" href="#hash-function-definition">hash function definition</a></li>
|
||
<li><a class="reference internal" href="#autoconf">autoconf</a></li>
|
||
<li><a class="reference internal" href="#hash-function-selection">hash function selection</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#python-api-addition">Python API addition</a><ul>
|
||
<li><a class="reference internal" href="#sys-module">sys module</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#necessary-modifications-to-c-code">Necessary modifications to C code</a><ul>
|
||
<li><a class="reference internal" href="#py-hashbytes-objects-object-c">_Py_HashBytes() (Objects/object.c)</a></li>
|
||
<li><a class="reference internal" href="#bytes-hash-objects-bytesobject-c">bytes_hash() (Objects/bytesobject.c)</a></li>
|
||
<li><a class="reference internal" href="#memory-hash-objects-memoryobject-c">memory_hash() (Objects/memoryobject.c)</a></li>
|
||
<li><a class="reference internal" href="#unicode-hash-objects-unicodeobject-c">unicode_hash() (Objects/unicodeobject.c)</a></li>
|
||
<li><a class="reference internal" href="#generic-hash-modules-datetimemodule-c">generic_hash() (Modules/_datetimemodule.c)</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#performance">Performance</a><ul>
|
||
<li><a class="reference internal" href="#hash-value-distribution">Hash value distribution</a></li>
|
||
<li><a class="reference internal" href="#typical-length">Typical length</a></li>
|
||
<li><a class="reference internal" href="#grand-unified-python-benchmark-suite">Grand Unified Python Benchmark Suite</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#alternative-counter-measures-against-hash-collision-dos">Alternative counter measures against hash collision DoS</a></li>
|
||
<li><a class="reference internal" href="#discussion">Discussion</a><ul>
|
||
<li><a class="reference internal" href="#pluggable">Pluggable</a></li>
|
||
<li><a class="reference internal" href="#non-aligned-memory-access">Non-aligned memory access</a></li>
|
||
<li><a class="reference internal" href="#ascii-str-bytes-hash-collision">ASCII str / bytes hash collision</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0456.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |