diff --git a/pep-0426.txt b/pep-0426.txt index 4c45609b7..eb9091e73 100644 --- a/pep-0426.txt +++ b/pep-0426.txt @@ -44,8 +44,9 @@ followed by a blank line and a payload containing a description of the distribution. This format is parseable by the ``email`` module with an appropriate -``email.policy.Policy()``. When ``metadata`` is a Unicode string, -```email.parser.Parser().parsestr(metadata)`` is a serviceable parser. +``email.policy.Policy()`` (see `Appendix A`_). When ``metadata`` is a +Unicode string, ```email.parser.Parser().parsestr(metadata)`` is a +serviceable parser. There are three standard locations for these metadata files: @@ -1358,25 +1359,41 @@ the release component. Finally, as the version scheme in use is dependent on the metadata version, it was deemed simpler to merge the scheme definition directly into -this PEP rather than continuing to maintain it as a separate PEP. This will -also allow all of the distutils-specific elements of PEP 386 to finally be -formally rejected. +this PEP rather than continuing to maintain it as a separate PEP. -The following statistics provide an analysis of the compatibility of existing -projects on PyPI with the specified versioning scheme (as of 16th February, -2013). +`Appendix B` shows detailed results of an analysis of PyPI distribution +version information, as collected on 19th February, 2013. This analysis +compares the behaviour of the explicitly ordered version schemes defined in +this PEP and PEP 386 with the de facto standard defined by the behaviour +of setuptools. These metrics are useful, as the intent of both PEPs is to +follow existing setuptools behaviour as closely as is feasible, while +still throwing exceptions for unorderable versions (rather than trying +to guess an appropriate order as setuptools does). -* Total number of distributions analysed: 28088 -* Distributions with no releases: 248 / 28088 (0.88 %) -* Fully compatible distributions: 24142 / 28088 (85.95 %) -* Compatible distributions after translation: 2830 / 28088 (10.08 %) -* Compatible distributions after filtering: 511 / 28088 (1.82 %) -* Distributions sorted differently after translation: 38 / 28088 (0.14 %) -* Distributions sorted differently without translation: 2 / 28088 (0.01 %) -* Distributions with no compatible releases: 317 / 28088 (1.13 %) +Overall, the percentage of compatible distributions improves from 97.7% +with PEP 386 to 98.7% with this PEP. While the number of projects affected +in practice was small, some of the affected projects are in widespread use +(such as Pinax and selenium). The surprising ordering discrepancy also +concerned developers and acted as an unnecessary barrier to adoption of +the new metadata standard. + +The data also shows that the pre-release sorting discrepancies are seen +only when analysing *all* versions from PyPI, rather than when analysing +public versions. This is largely due to the fact that PyPI normally reports +only the most recent version for each project (unless the maintainers +explicitly configure it to display additional versions). However, +installers that need to satisfy detailed version constraints often need +to look at all available versions, as they may need to retrieve an older +release. + +Even this PEP doesn't completely eliminate the sorting differences relative +to setuptools: + +* Sorts differently (after translations): 38 / 28194 (0.13 %) +* Sorts differently (no translations): 2 / 28194 (0.01 %) The two remaining sort order discrepancies picked up by the analysis are due -to a pair of projects which have published releases ending with a carriage +to a pair of projects which have PyPI releases ending with a carriage return, alongside releases with the same version number, only *without* the trailing carriage return. @@ -1390,26 +1407,6 @@ pkg_resources scheme will sort "-dev-N" pre-releases differently from standard scheme will normalize both representations to ".devN" and sort them by the numeric component. -For comparison, here are the corresponding analysis results for PEP 386: - -* Total number of distributions analysed: 28088 -* Distributions with no releases: 248 / 28088 (0.88 %) -* Fully compatible distributions: 23874 / 28088 (85.00 %) -* Compatible distributions after translation: 2786 / 28088 (9.92 %) -* Compatible distributions after filtering: 527 / 28088 (1.88 %) -* Distributions sorted differently after translation: 96 / 28088 (0.34 %) -* Distributions sorted differently without translation: 14 / 28088 (0.05 %) -* Distributions with no compatible releases: 543 / 28088 (1.93 %) - -These figures make it clear that only a relatively small number of current -projects are affected by these changes. However, some of the affected -projects are in widespread use (such as Pinax and selenium). The -changes also serve to bring the standard scheme more into line with -developer's expectations, which is an important element in encouraging -adoption of the new metadata version. - -The script used for the above analysis is available at [3]_. - A more opinionated description of the versioning scheme ------------------------------------------------------- @@ -1550,8 +1547,10 @@ justifications for needing such a standard can be found in PEP 386. .. [3] Version compatibility analysis script: http://hg.python.org/peps/file/default/pep-0426/pepsort.py -Appendix -======== +Appendix A +========== + +The script used for this analysis is available at [3]_. Parsing and generating the Metadata 2.0 serialization format using Python 3.3:: @@ -1610,6 +1609,74 @@ Python 3.3:: # Correct if sys.stdout.encoding == 'UTF-8': Generator(sys.stdout, maxheaderlen=0).flatten(m) +Appendix B +========== + +Metadata v2.0 guidelines versus setuptools:: + + $ ./pepsort.py + Comparing PEP 426 version sort to setuptools. + + Analysing release versions + Compatible: 24477 / 28194 (86.82 %) + Compatible with translation: 247 / 28194 (0.88 %) + Compatible with filtering: 84 / 28194 (0.30 %) + No compatible versions: 420 / 28194 (1.49 %) + Sorts differently (after translations): 0 / 28194 (0.00 %) + Sorts differently (no translations): 0 / 28194 (0.00 %) + No applicable versions: 2966 / 28194 (10.52 %) + + Analysing public versions + Compatible: 25600 / 28194 (90.80 %) + Compatible with translation: 1505 / 28194 (5.34 %) + Compatible with filtering: 13 / 28194 (0.05 %) + No compatible versions: 420 / 28194 (1.49 %) + Sorts differently (after translations): 0 / 28194 (0.00 %) + Sorts differently (no translations): 0 / 28194 (0.00 %) + No applicable versions: 656 / 28194 (2.33 %) + + Analysing all versions + Compatible: 24239 / 28194 (85.97 %) + Compatible with translation: 2833 / 28194 (10.05 %) + Compatible with filtering: 513 / 28194 (1.82 %) + No compatible versions: 320 / 28194 (1.13 %) + Sorts differently (after translations): 38 / 28194 (0.13 %) + Sorts differently (no translations): 2 / 28194 (0.01 %) + No applicable versions: 249 / 28194 (0.88 %) + +Metadata v1.2 guidelines versus setuptools:: + + $ ./pepsort.py 386 + Comparing PEP 386 version sort to setuptools. + + Analysing release versions + Compatible: 24244 / 28194 (85.99 %) + Compatible with translation: 247 / 28194 (0.88 %) + Compatible with filtering: 84 / 28194 (0.30 %) + No compatible versions: 648 / 28194 (2.30 %) + Sorts differently (after translations): 0 / 28194 (0.00 %) + Sorts differently (no translations): 0 / 28194 (0.00 %) + No applicable versions: 2971 / 28194 (10.54 %) + + Analysing public versions + Compatible: 25371 / 28194 (89.99 %) + Compatible with translation: 1507 / 28194 (5.35 %) + Compatible with filtering: 12 / 28194 (0.04 %) + No compatible versions: 648 / 28194 (2.30 %) + Sorts differently (after translations): 0 / 28194 (0.00 %) + Sorts differently (no translations): 0 / 28194 (0.00 %) + No applicable versions: 656 / 28194 (2.33 %) + + Analysing all versions + Compatible: 23969 / 28194 (85.01 %) + Compatible with translation: 2789 / 28194 (9.89 %) + Compatible with filtering: 530 / 28194 (1.88 %) + No compatible versions: 547 / 28194 (1.94 %) + Sorts differently (after translations): 96 / 28194 (0.34 %) + Sorts differently (no translations): 14 / 28194 (0.05 %) + No applicable versions: 249 / 28194 (0.88 %) + + Copyright ========= diff --git a/pep-0426/pepsort.py b/pep-0426/pepsort.py index 349e7088e..536248e25 100755 --- a/pep-0426/pepsort.py +++ b/pep-0426/pepsort.py @@ -20,6 +20,8 @@ logger = logging.getLogger(__name__) PEP426_VERSION_RE = re.compile('^(\d+(\.\d+)*)((a|b|c|rc)(\d+))?' '(\.(post)(\d+))?(\.(dev)(\d+))?$') +PEP426_PRERELEASE_RE = re.compile('(a|b|c|rc|dev)\d+') + def pep426_key(s): s = s.strip() m = PEP426_VERSION_RE.match(s) @@ -60,23 +62,28 @@ def pep426_key(s): return nums, pre, post, dev +def is_release_version(s): + return not bool(PEP426_PRERELEASE_RE.search(s)) + def cache_projects(cache_name): logger.info("Retrieving package data from PyPI") client = xmlrpclib.ServerProxy('http://python.org/pypi') projects = dict.fromkeys(client.list_packages()) + public = projects.copy() failed = [] for pname in projects: - time.sleep(0.1) + time.sleep(0.01) logger.debug("Retrieving versions for %s", pname) try: projects[pname] = list(client.package_releases(pname, True)) + public[pname] = list(client.package_releases(pname)) except: failed.append(pname) logger.warn("Error retrieving versions for %s", failed) with open(cache_name, 'w') as f: - json.dump(projects, f, sort_keys=True, + json.dump([projects, public], f, sort_keys=True, indent=2, separators=(',', ': ')) - return projects + return projects, public def get_projects(cache_name): try: @@ -84,11 +91,11 @@ def get_projects(cache_name): except IOError as exc: if exc.errno != errno.ENOENT: raise - projects = cache_projects(cache_name); + projects, public = cache_projects(cache_name); else: with f: - projects = json.load(f) - return projects + projects, public = json.load(f) + return projects, public VERSION_CACHE = "pepsort_cache.json" @@ -112,109 +119,139 @@ SORT_KEYS = { "426": pep426_key, } -def main(pepno = '426'): - sort_key = SORT_KEYS[pepno] - print('Comparing PEP %s version sort to setuptools.' % pepno) +class Analysis: - projects = get_projects(VERSION_CACHE) - num_projects = len(projects) + def __init__(self, title, projects, releases_only=False): + self.title = title + self.projects = projects - null_projects = Category("No releases", num_projects) - compatible_projects = Category("Compatible", num_projects) - translated_projects = Category("Compatible with translation", num_projects) - filtered_projects = Category("Compatible with filtering", num_projects) - sort_error_translated_projects = Category("Translations sort differently", num_projects) - sort_error_compatible_projects = Category("Incompatible due to sorting errors", num_projects) - incompatible_projects = Category("Incompatible", num_projects) + num_projects = len(projects) - categories = [ - null_projects, - compatible_projects, - translated_projects, - filtered_projects, - sort_error_translated_projects, - sort_error_compatible_projects, - incompatible_projects, - ] + compatible_projects = Category("Compatible", num_projects) + translated_projects = Category("Compatible with translation", num_projects) + filtered_projects = Category("Compatible with filtering", num_projects) + incompatible_projects = Category("No compatible versions", num_projects) + sort_error_translated_projects = Category("Sorts differently (after translations)", num_projects) + sort_error_compatible_projects = Category("Sorts differently (no translations)", num_projects) + null_projects = Category("No applicable versions", num_projects) - sort_failures = 0 - for i, (pname, versions) in enumerate(projects.items()): - if i % 100 == 0: - sys.stderr.write('%s / %s\r' % (i, num_projects)) - sys.stderr.flush() - if not versions: - logger.debug('%-15.15s has no releases', pname) - null_projects.add(pname) - continue - # list_legacy and list_pep will contain 2-tuples - # comprising a sortable representation according to either - # the setuptools (legacy) algorithm or the PEP algorithm. - # followed by the original version string - list_legacy = [(legacy_key(v), v) for v in versions] - # Go through the PEP 386/426 stuff one by one, since - # we might get failures - list_pep = [] - excluded_versions = set() - translated_versions = set() - for v in versions: - try: - k = sort_key(v) - except Exception: - s = suggest_normalized_version(v) - if not s: - good = False - logger.debug('%-15.15s failed for %r, no suggestions', pname, v) - excluded_versions.add(v) - continue - else: - try: - k = sort_key(s) - except ValueError: - logger.error('%-15.15s failed for %r, with suggestion %r', - pname, v, s) + self.categories = [ + compatible_projects, + translated_projects, + filtered_projects, + incompatible_projects, + sort_error_translated_projects, + sort_error_compatible_projects, + null_projects, + ] + + sort_key = SORT_KEYS[pepno] + sort_failures = 0 + for i, (pname, versions) in enumerate(projects.items()): + if i % 100 == 0: + sys.stderr.write('%s / %s\r' % (i, num_projects)) + sys.stderr.flush() + if not versions: + logger.debug('%-15.15s has no versions', pname) + null_projects.add(pname) + continue + # list_legacy and list_pep will contain 2-tuples + # comprising a sortable representation according to either + # the setuptools (legacy) algorithm or the PEP algorithm. + # followed by the original version string + # Go through the PEP 386/426 stuff one by one, since + # we might get failures + list_pep = [] + release_versions = set() + prerelease_versions = set() + excluded_versions = set() + translated_versions = set() + for v in versions: + s = v + try: + k = sort_key(v) + except Exception: + s = suggest_normalized_version(v) + if not s: + good = False + logger.debug('%-15.15s failed for %r, no suggestions', pname, v) excluded_versions.add(v) continue - logger.debug('%-15.15s translated %r to %r', pname, v, s) - translated_versions.add(v) - list_pep.append((k, v)) - if not list_pep: - logger.debug('%-15.15s has no compatible releases', pname) - incompatible_projects.add(pname) - continue - # Now check the versions sort as expected - if excluded_versions: - list_legacy = [(k, v) for k, v in list_legacy - if v not in excluded_versions] - assert len(list_legacy) == len(list_pep) - sorted_legacy = sorted(list_legacy) - sorted_pep = sorted(list_pep) - sv_legacy = [t[1] for t in sorted_legacy] - sv_pep = [t[1] for t in sorted_pep] - if sv_legacy != sv_pep: + else: + try: + k = sort_key(s) + except ValueError: + logger.error('%-15.15s failed for %r, with suggestion %r', + pname, v, s) + excluded_versions.add(v) + continue + logger.debug('%-15.15s translated %r to %r', pname, v, s) + translated_versions.add(v) + if is_release_version(s): + release_versions.add(v) + else: + prerelease_versions.add(v) + if releases_only: + logger.debug('%-15.15s ignoring pre-release %r', pname, s) + continue + list_pep.append((k, v)) + if releases_only and prerelease_versions and not release_versions: + logger.debug('%-15.15s has no release versions', pname) + null_projects.add(pname) + continue + if not list_pep: + logger.debug('%-15.15s has no compatible versions', pname) + incompatible_projects.add(pname) + continue + # The legacy approach doesn't refuse the temptation to guess, + # so it *always* gives some kind of answer + if releases_only: + excluded_versions |= prerelease_versions + accepted_versions = set(versions) - excluded_versions + list_legacy = [(legacy_key(v), v) for v in accepted_versions] + assert len(list_legacy) == len(list_pep) + sorted_legacy = sorted(list_legacy) + sorted_pep = sorted(list_pep) + sv_legacy = [t[1] for t in sorted_legacy] + sv_pep = [t[1] for t in sorted_pep] + if sv_legacy != sv_pep: + if translated_versions: + logger.debug('%-15.15s translation creates sort differences', pname) + sort_error_translated_projects.add(pname) + else: + logger.debug('%-15.15s incompatible due to sort errors', pname) + sort_error_compatible_projects.add(pname) + logger.debug('%-15.15s unequal: legacy: %s', pname, sv_legacy) + logger.debug('%-15.15s unequal: pep%s: %s', pname, pepno, sv_pep) + continue + # The project is compatible to some degree, + if excluded_versions: + logger.debug('%-15.15s has some compatible versions', pname) + filtered_projects.add(pname) + continue if translated_versions: - logger.debug('%-15.15s translation creates sort differences', pname) - sort_error_translated_projects.add(pname) - else: - logger.debug('%-15.15s incompatible due to sort errors', pname) - sort_error_compatible_projects.add(pname) - logger.debug('%-15.15s unequal: legacy: %s', pname, sv_legacy) - logger.debug('%-15.15s unequal: pep%s: %s', pname, pepno, sv_pep) - continue - # The project is compatible to some degree, - if excluded_versions: - logger.debug('%-15.15s has some compatible releases', pname) - filtered_projects.add(pname) - continue - if translated_versions: - logger.debug('%-15.15s is compatible after translation', pname) - translated_projects.add(pname) - continue - logger.debug('%-15.15s is fully compatible', pname) - compatible_projects.add(pname) + logger.debug('%-15.15s is compatible after translation', pname) + translated_projects.add(pname) + continue + logger.debug('%-15.15s is fully compatible', pname) + compatible_projects.add(pname) - for category in categories: - print(category) + def print_report(self): + print("Analysing {}".format(self.title)) + for category in self.categories: + print(" ", category) + +def main(pepno = '426'): + print('Comparing PEP %s version sort to setuptools.' % pepno) + + projects, public = get_projects(VERSION_CACHE) + print() + Analysis("release versions", public, releases_only=True).print_report() + print() + Analysis("public versions", public).print_report() + print() + Analysis("all versions", projects).print_report() # Uncomment the line below to explore differences in details # import pdb; pdb.set_trace() # Grepping the log files is also informative