From 781581bd26d1c9ebddf27207e303df464abcc58f Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 6 Oct 2013 14:28:39 +0200 Subject: [PATCH] PEP 456: drop pluggable and go for compile time configuration of the hash algorithm. --- pep-0456.txt | 81 ++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 47 deletions(-) diff --git a/pep-0456.txt b/pep-0456.txt index 0b37dd763..297d897db 100644 --- a/pep-0456.txt +++ b/pep-0456.txt @@ -1,5 +1,5 @@ PEP: 456 -Title: Pluggable and secure hash algorithm +Title: Secure and interchangeable hash algorithm Version: $Revision$ Last-Modified: $Date$ Author: Christian Heimes @@ -8,16 +8,16 @@ Type: Standards Track Content-Type: text/x-rst Created: 27-Sep-2013 Python-Version: 3.4 -Post-History: +Post-History: 06-Oct-2013 Abstract ======== This PEP proposes SipHash as default string and bytes hash algorithm to properly -fix hash randomization once and for all. It also proposes an addition to -Python's C API in order to make the hash code pluggable. The new API allows to -select the algorithm on startup as well as the addition of more hash algorithms. +fix hash randomization once and for all. It also proposes modifications to +Python's C code in order to unify the hash code and to make it easily +interchangeable. Rationale @@ -57,10 +57,8 @@ This PEP proposes three major changes to the hash code for strings and bytes: ``Objects/object.c`` and ``Objects/unicodeobject.c``. The function takes a void pointer plus length and returns the hash for it. -* The algorithm can be selected by the user with an environment variable, - command line argument or with an API function (for embedders). FNV is - guaranteed to exist on all platforms. SipHash is available on the majority - of modern systems. +* The algorithm can be selected at compile time. FNV is guaranteed to exist + on all platforms. SipHash is available on the majority of modern systems. Requirements for a hash function @@ -321,50 +319,25 @@ hash function table type definition:: typedef struct { - PyHash_Func hashfunc; /* function pointer */ + PyHash_Func hash; /* function pointer */ char *name; /* name of the hash algorithm and variant */ int hash_bits; /* internal size of hash value */ int seed_bits; /* size of seed input */ - int precedence; /* ranking for auto-selection */ - } PyHash_FuncDef; + } _PyHash_FuncDef; - PyAPI_DATA(PyHash_FuncDef *) PyHash_FuncTable; + PyAPI_DATA(_PyHash_FuncDef *) _PyHash_Func; Implementation:: - PyHash_FuncDef hash_func_table[] = { - {fnv, "fnv", 64, 128, 10}, + #ifndef PY_HASH_FUNC #ifdef PY_UINT64_T - {siphash24, "sip24", sizeof(Py_hash_t)*8, sizeof(Py_hash_t)*8, 20}, + _PyHash_Func = {siphash24, "sip24", 64, 128} + #else + _PyHash_Func = {fnv, "fnv", 8 * sizeof(Py_hash_t), 16 * sizeof(Py_hash_t)} + #endif #endif - {NULL, NULL}, - }; - PyHash_FuncDef *PyHash_FuncTable = hash_func_table; - - -hash function API ------------------ - -function proto types:: - - PyAPI_FUNC(int) PyHash_SetHashAlgorithm(char *name); - - PyAPI_FUNC(PyHash_FuncDef *) PyHash_GetHashAlgorithm(void); - - PyAPI_DATA(PyHash_FuncDef *) _PyHash_Func; - -``PyHash_SetHashAlgorithm(NULL)`` selects the hash algorithm with the highest -precedence. ``PyHash_SetHashAlgorithm("sip24")`` selects siphash24 as hash -algorithm. The function returns ``0`` on success. In case the algorithm is -not supported or a hash algorithm is already set it returns ``-1``. -(XXX use enum?) - -``PyHash_GetHashAlgorithm()`` returns a pointer to current hash function -definition or `NULL`. - -``_PyHash_Func`` holds the set hash function definition. It can't be modified -or reset once a hash algorithm is set. +TODO: select hash algorithm with autoconf variable Python API addition @@ -379,9 +352,8 @@ algorithm as well as all available algorithms. :: sys.hash_info(algorithm='siphash24', - available_algorithms=('siphash24', 'fnv'), hash_bits=64, - hash_output=64, # sizeof(Py_hash_t)*8 + hash_output=64, # 8 * sizeof(Py_hash_t) seed_bits=128) @@ -439,8 +411,8 @@ multiplied with the size of the internal unicode kind:: if (PyUnicode_READY(u) == -1) return -1; - x = _PyHash_Func->hashfunc(PyUnicode_DATA(u), - PyUnicode_GET_LENGTH(u) * PyUnicode_KIND(u)); + x = _PyHash_Func->hash(PyUnicode_DATA(u), + PyUnicode_GET_LENGTH(u) * PyUnicode_KIND(u)); generic_hash (Modules/_datetimemodule.c) @@ -534,6 +506,19 @@ the past, but are not subject of this PEP. prefixes are stored within the tree structure. +Discussion +========== + +Pluggable +--------- + +The first draft of this PEP made the hash algorithm pluggable at runtime. It +supported multiple hash algorithms in one binary to give the user the +possibility to select a hash algorithm at startup. The approach was considered +an unnecessary complication by several core committers [pluggable]_. Subsequent +versions of the PEP aim for compile time configuration. + + Reference ========= @@ -567,6 +552,8 @@ Reference .. [aes-ni] http://en.wikipedia.org/wiki/AES_instruction_set +.. [pluggable] https://mail.python.org/pipermail/python-dev/2013-October/129138.html + Copyright =========