python-peps/pep-0426/pepsort.py

234 lines
7.8 KiB
Python
Executable File

#!/usr/bin/env python3
# Distribution sorting comparisons
# between pkg_resources, PEP 386 and PEP 426
#
# Requires distlib, original script written by Vinay Sajip
import logging
import re
import sys
import json
import errno
import time
from distlib.compat import xmlrpclib
from distlib.version import suggest_normalized_version, legacy_key, normalized_key
logger = logging.getLogger(__name__)
PEP426_VERSION_RE = re.compile('^(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
'(\.(post)(\d+))?(\.(dev)(\d+))?$')
def pep426_key(s):
s = s.strip()
m = PEP426_VERSION_RE.match(s)
if not m:
raise ValueError('Not a valid version: %s' % s)
groups = m.groups()
nums = tuple(int(v) for v in groups[0].split('.'))
while len(nums) > 1 and nums[-1] == 0:
nums = nums[:-1]
pre = groups[3:5]
post = groups[6:8]
dev = groups[9:11]
if pre == (None, None):
pre = ()
else:
pre = pre[0], int(pre[1])
if post == (None, None):
post = ()
else:
post = post[0], int(post[1])
if dev == (None, None):
dev = ()
else:
dev = dev[0], int(dev[1])
if not pre:
# either before pre-release, or final release and after
if not post and dev:
# before pre-release
pre = ('a', -1) # to sort before a0
else:
pre = ('z',) # to sort after all pre-releases
# now look at the state of post and dev.
if not post:
post = ('a',)
if not dev:
dev = ('final',)
return nums, pre, post, dev
def cache_projects(cache_name):
logger.info("Retrieving package data from PyPI")
client = xmlrpclib.ServerProxy('http://python.org/pypi')
projects = dict.fromkeys(client.list_packages())
failed = []
for pname in projects:
time.sleep(0.1)
logger.debug("Retrieving versions for %s", pname)
try:
projects[pname] = list(client.package_releases(pname, True))
except:
failed.append(pname)
logger.warn("Error retrieving versions for %s", failed)
with open(cache_name, 'w') as f:
json.dump(projects, f, sort_keys=True,
indent=2, separators=(',', ': '))
return projects
def get_projects(cache_name):
try:
f = open(cache_name)
except IOError as exc:
if exc.errno != errno.ENOENT:
raise
projects = cache_projects(cache_name);
else:
with f:
projects = json.load(f)
return projects
VERSION_CACHE = "pepsort_cache.json"
class Category(set):
def __init__(self, title, num_projects):
super().__init__()
self.title = title
self.num_projects = num_projects
def __str__(self):
num_projects = self.num_projects
num_in_category = len(self)
pct = (100.0 * num_in_category) / num_projects
return "{}: {:d} / {:d} ({:.2f} %)".format(
self.title, num_in_category, num_projects, pct)
SORT_KEYS = {
"386": normalized_key,
"426": pep426_key,
}
def main(pepno = '426'):
sort_key = SORT_KEYS[pepno]
print('Comparing PEP %s version sort to setuptools.' % pepno)
projects = get_projects(VERSION_CACHE)
num_projects = len(projects)
null_projects = Category("No releases", num_projects)
compatible_projects = Category("Compatible", num_projects)
translated_projects = Category("Compatible with translation", num_projects)
filtered_projects = Category("Compatible with filtering", num_projects)
sort_error_translated_projects = Category("Translations sort differently", num_projects)
sort_error_compatible_projects = Category("Incompatible due to sorting errors", num_projects)
incompatible_projects = Category("Incompatible", num_projects)
categories = [
null_projects,
compatible_projects,
translated_projects,
filtered_projects,
sort_error_translated_projects,
sort_error_compatible_projects,
incompatible_projects,
]
sort_failures = 0
for i, (pname, versions) in enumerate(projects.items()):
if i % 100 == 0:
sys.stderr.write('%s / %s\r' % (i, num_projects))
sys.stderr.flush()
if not versions:
logger.debug('%-15.15s has no releases', pname)
null_projects.add(pname)
continue
# list_legacy and list_pep will contain 2-tuples
# comprising a sortable representation according to either
# the setuptools (legacy) algorithm or the PEP algorithm.
# followed by the original version string
list_legacy = [(legacy_key(v), v) for v in versions]
# Go through the PEP 386/426 stuff one by one, since
# we might get failures
list_pep = []
excluded_versions = set()
translated_versions = set()
for v in versions:
try:
k = sort_key(v)
except Exception:
s = suggest_normalized_version(v)
if not s:
good = False
logger.debug('%-15.15s failed for %r, no suggestions', pname, v)
excluded_versions.add(v)
continue
else:
try:
k = sort_key(s)
except ValueError:
logger.error('%-15.15s failed for %r, with suggestion %r',
pname, v, s)
excluded_versions.add(v)
continue
logger.debug('%-15.15s translated %r to %r', pname, v, s)
translated_versions.add(v)
list_pep.append((k, v))
if not list_pep:
logger.debug('%-15.15s has no compatible releases', pname)
incompatible_projects.add(pname)
continue
# Now check the versions sort as expected
if excluded_versions:
list_legacy = [(k, v) for k, v in list_legacy
if v not in excluded_versions]
assert len(list_legacy) == len(list_pep)
sorted_legacy = sorted(list_legacy)
sorted_pep = sorted(list_pep)
sv_legacy = [t[1] for t in sorted_legacy]
sv_pep = [t[1] for t in sorted_pep]
if sv_legacy != sv_pep:
if translated_versions:
logger.debug('%-15.15s translation creates sort differences', pname)
sort_error_translated_projects.add(pname)
else:
logger.debug('%-15.15s incompatible due to sort errors', pname)
sort_error_compatible_projects.add(pname)
logger.debug('%-15.15s unequal: legacy: %s', pname, sv_legacy)
logger.debug('%-15.15s unequal: pep%s: %s', pname, pepno, sv_pep)
continue
# The project is compatible to some degree,
if excluded_versions:
logger.debug('%-15.15s has some compatible releases', pname)
filtered_projects.add(pname)
continue
if translated_versions:
logger.debug('%-15.15s is compatible after translation', pname)
translated_projects.add(pname)
continue
logger.debug('%-15.15s is fully compatible', pname)
compatible_projects.add(pname)
for category in categories:
print(category)
# Uncomment the line below to explore differences in details
# import pdb; pdb.set_trace()
# Grepping the log files is also informative
# e.g. "grep unequal pep426sort.log" for the PEP 426 sort differences
if __name__ == '__main__':
if len(sys.argv) > 1 and sys.argv[1] == '386':
pepno = '386'
else:
pepno = '426'
logname = 'pep{}sort.log'.format(pepno)
logging.basicConfig(level=logging.DEBUG, filename=logname,
filemode='w', format='%(message)s')
logger.setLevel(logging.DEBUG)
main(pepno)