2013-02-17 03:14:42 -05:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# Distribution sorting comparisons
|
|
|
|
# between pkg_resources, PEP 386 and PEP 426
|
|
|
|
#
|
|
|
|
# Requires distlib, original script written by Vinay Sajip
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import json
|
|
|
|
import errno
|
|
|
|
import time
|
|
|
|
|
|
|
|
from distlib.compat import xmlrpclib
|
|
|
|
from distlib.version import suggest_normalized_version, legacy_key, normalized_key
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2023-03-10 13:48:32 -05:00
|
|
|
PEP426_VERSION_RE = re.compile(r'^(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
|
|
|
|
r'(\.(post)(\d+))?(\.(dev)(\d+))?$')
|
2013-02-17 03:14:42 -05:00
|
|
|
|
2023-03-10 13:48:32 -05:00
|
|
|
PEP426_PRERELEASE_RE = re.compile(r'(a|b|c|rc|dev)\d+')
|
2013-02-21 08:37:00 -05:00
|
|
|
|
2013-02-17 03:14:42 -05:00
|
|
|
def pep426_key(s):
|
|
|
|
s = s.strip()
|
|
|
|
m = PEP426_VERSION_RE.match(s)
|
|
|
|
if not m:
|
|
|
|
raise ValueError('Not a valid version: %s' % s)
|
|
|
|
groups = m.groups()
|
|
|
|
nums = tuple(int(v) for v in groups[0].split('.'))
|
|
|
|
while len(nums) > 1 and nums[-1] == 0:
|
|
|
|
nums = nums[:-1]
|
|
|
|
|
|
|
|
pre = groups[3:5]
|
|
|
|
post = groups[6:8]
|
|
|
|
dev = groups[9:11]
|
|
|
|
if pre == (None, None):
|
|
|
|
pre = ()
|
|
|
|
else:
|
|
|
|
pre = pre[0], int(pre[1])
|
|
|
|
if post == (None, None):
|
|
|
|
post = ()
|
|
|
|
else:
|
|
|
|
post = post[0], int(post[1])
|
|
|
|
if dev == (None, None):
|
|
|
|
dev = ()
|
|
|
|
else:
|
|
|
|
dev = dev[0], int(dev[1])
|
|
|
|
if not pre:
|
|
|
|
# either before pre-release, or final release and after
|
|
|
|
if not post and dev:
|
|
|
|
# before pre-release
|
|
|
|
pre = ('a', -1) # to sort before a0
|
|
|
|
else:
|
|
|
|
pre = ('z',) # to sort after all pre-releases
|
|
|
|
# now look at the state of post and dev.
|
|
|
|
if not post:
|
|
|
|
post = ('a',)
|
|
|
|
if not dev:
|
|
|
|
dev = ('final',)
|
|
|
|
|
|
|
|
return nums, pre, post, dev
|
|
|
|
|
2013-02-21 08:37:00 -05:00
|
|
|
def is_release_version(s):
|
|
|
|
return not bool(PEP426_PRERELEASE_RE.search(s))
|
|
|
|
|
2013-02-17 03:14:42 -05:00
|
|
|
def cache_projects(cache_name):
|
|
|
|
logger.info("Retrieving package data from PyPI")
|
|
|
|
client = xmlrpclib.ServerProxy('http://python.org/pypi')
|
|
|
|
projects = dict.fromkeys(client.list_packages())
|
2013-02-21 08:37:00 -05:00
|
|
|
public = projects.copy()
|
2013-02-17 03:14:42 -05:00
|
|
|
failed = []
|
|
|
|
for pname in projects:
|
2013-02-21 08:37:00 -05:00
|
|
|
time.sleep(0.01)
|
2013-02-17 03:14:42 -05:00
|
|
|
logger.debug("Retrieving versions for %s", pname)
|
|
|
|
try:
|
|
|
|
projects[pname] = list(client.package_releases(pname, True))
|
2013-02-21 08:37:00 -05:00
|
|
|
public[pname] = list(client.package_releases(pname))
|
2013-02-17 03:14:42 -05:00
|
|
|
except:
|
|
|
|
failed.append(pname)
|
|
|
|
logger.warn("Error retrieving versions for %s", failed)
|
|
|
|
with open(cache_name, 'w') as f:
|
2013-02-21 08:37:00 -05:00
|
|
|
json.dump([projects, public], f, sort_keys=True,
|
2013-02-17 03:14:42 -05:00
|
|
|
indent=2, separators=(',', ': '))
|
2013-02-21 08:37:00 -05:00
|
|
|
return projects, public
|
2013-02-17 03:14:42 -05:00
|
|
|
|
|
|
|
def get_projects(cache_name):
|
|
|
|
try:
|
|
|
|
f = open(cache_name)
|
|
|
|
except IOError as exc:
|
|
|
|
if exc.errno != errno.ENOENT:
|
|
|
|
raise
|
2013-02-21 08:37:00 -05:00
|
|
|
projects, public = cache_projects(cache_name);
|
2013-02-17 03:14:42 -05:00
|
|
|
else:
|
|
|
|
with f:
|
2013-02-21 08:37:00 -05:00
|
|
|
projects, public = json.load(f)
|
|
|
|
return projects, public
|
2013-02-17 03:14:42 -05:00
|
|
|
|
|
|
|
|
|
|
|
VERSION_CACHE = "pepsort_cache.json"
|
|
|
|
|
|
|
|
class Category(set):
|
|
|
|
|
|
|
|
def __init__(self, title, num_projects):
|
|
|
|
super().__init__()
|
|
|
|
self.title = title
|
|
|
|
self.num_projects = num_projects
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
num_projects = self.num_projects
|
|
|
|
num_in_category = len(self)
|
|
|
|
pct = (100.0 * num_in_category) / num_projects
|
|
|
|
return "{}: {:d} / {:d} ({:.2f} %)".format(
|
|
|
|
self.title, num_in_category, num_projects, pct)
|
|
|
|
|
|
|
|
SORT_KEYS = {
|
|
|
|
"386": normalized_key,
|
|
|
|
"426": pep426_key,
|
|
|
|
}
|
|
|
|
|
2013-02-21 08:37:00 -05:00
|
|
|
class Analysis:
|
2013-02-17 03:14:42 -05:00
|
|
|
|
2013-02-21 08:37:00 -05:00
|
|
|
def __init__(self, title, projects, releases_only=False):
|
|
|
|
self.title = title
|
|
|
|
self.projects = projects
|
|
|
|
|
|
|
|
num_projects = len(projects)
|
|
|
|
|
|
|
|
compatible_projects = Category("Compatible", num_projects)
|
|
|
|
translated_projects = Category("Compatible with translation", num_projects)
|
|
|
|
filtered_projects = Category("Compatible with filtering", num_projects)
|
|
|
|
incompatible_projects = Category("No compatible versions", num_projects)
|
|
|
|
sort_error_translated_projects = Category("Sorts differently (after translations)", num_projects)
|
|
|
|
sort_error_compatible_projects = Category("Sorts differently (no translations)", num_projects)
|
|
|
|
null_projects = Category("No applicable versions", num_projects)
|
|
|
|
|
|
|
|
self.categories = [
|
|
|
|
compatible_projects,
|
|
|
|
translated_projects,
|
|
|
|
filtered_projects,
|
|
|
|
incompatible_projects,
|
|
|
|
sort_error_translated_projects,
|
|
|
|
sort_error_compatible_projects,
|
|
|
|
null_projects,
|
|
|
|
]
|
|
|
|
|
|
|
|
sort_key = SORT_KEYS[pepno]
|
|
|
|
sort_failures = 0
|
|
|
|
for i, (pname, versions) in enumerate(projects.items()):
|
|
|
|
if i % 100 == 0:
|
|
|
|
sys.stderr.write('%s / %s\r' % (i, num_projects))
|
|
|
|
sys.stderr.flush()
|
|
|
|
if not versions:
|
|
|
|
logger.debug('%-15.15s has no versions', pname)
|
|
|
|
null_projects.add(pname)
|
|
|
|
continue
|
|
|
|
# list_legacy and list_pep will contain 2-tuples
|
|
|
|
# comprising a sortable representation according to either
|
|
|
|
# the setuptools (legacy) algorithm or the PEP algorithm.
|
|
|
|
# followed by the original version string
|
|
|
|
# Go through the PEP 386/426 stuff one by one, since
|
|
|
|
# we might get failures
|
|
|
|
list_pep = []
|
|
|
|
release_versions = set()
|
|
|
|
prerelease_versions = set()
|
|
|
|
excluded_versions = set()
|
|
|
|
translated_versions = set()
|
|
|
|
for v in versions:
|
|
|
|
s = v
|
|
|
|
try:
|
|
|
|
k = sort_key(v)
|
|
|
|
except Exception:
|
|
|
|
s = suggest_normalized_version(v)
|
|
|
|
if not s:
|
|
|
|
good = False
|
|
|
|
logger.debug('%-15.15s failed for %r, no suggestions', pname, v)
|
2013-02-17 03:14:42 -05:00
|
|
|
excluded_versions.add(v)
|
|
|
|
continue
|
2013-02-21 08:37:00 -05:00
|
|
|
else:
|
|
|
|
try:
|
|
|
|
k = sort_key(s)
|
|
|
|
except ValueError:
|
|
|
|
logger.error('%-15.15s failed for %r, with suggestion %r',
|
|
|
|
pname, v, s)
|
|
|
|
excluded_versions.add(v)
|
|
|
|
continue
|
|
|
|
logger.debug('%-15.15s translated %r to %r', pname, v, s)
|
|
|
|
translated_versions.add(v)
|
|
|
|
if is_release_version(s):
|
|
|
|
release_versions.add(v)
|
|
|
|
else:
|
|
|
|
prerelease_versions.add(v)
|
|
|
|
if releases_only:
|
|
|
|
logger.debug('%-15.15s ignoring pre-release %r', pname, s)
|
|
|
|
continue
|
|
|
|
list_pep.append((k, v))
|
|
|
|
if releases_only and prerelease_versions and not release_versions:
|
|
|
|
logger.debug('%-15.15s has no release versions', pname)
|
|
|
|
null_projects.add(pname)
|
|
|
|
continue
|
|
|
|
if not list_pep:
|
|
|
|
logger.debug('%-15.15s has no compatible versions', pname)
|
|
|
|
incompatible_projects.add(pname)
|
|
|
|
continue
|
|
|
|
# The legacy approach doesn't refuse the temptation to guess,
|
|
|
|
# so it *always* gives some kind of answer
|
|
|
|
if releases_only:
|
|
|
|
excluded_versions |= prerelease_versions
|
|
|
|
accepted_versions = set(versions) - excluded_versions
|
|
|
|
list_legacy = [(legacy_key(v), v) for v in accepted_versions]
|
|
|
|
assert len(list_legacy) == len(list_pep)
|
|
|
|
sorted_legacy = sorted(list_legacy)
|
|
|
|
sorted_pep = sorted(list_pep)
|
|
|
|
sv_legacy = [t[1] for t in sorted_legacy]
|
|
|
|
sv_pep = [t[1] for t in sorted_pep]
|
|
|
|
if sv_legacy != sv_pep:
|
|
|
|
if translated_versions:
|
|
|
|
logger.debug('%-15.15s translation creates sort differences', pname)
|
|
|
|
sort_error_translated_projects.add(pname)
|
|
|
|
else:
|
|
|
|
logger.debug('%-15.15s incompatible due to sort errors', pname)
|
|
|
|
sort_error_compatible_projects.add(pname)
|
|
|
|
logger.debug('%-15.15s unequal: legacy: %s', pname, sv_legacy)
|
|
|
|
logger.debug('%-15.15s unequal: pep%s: %s', pname, pepno, sv_pep)
|
|
|
|
continue
|
|
|
|
# The project is compatible to some degree,
|
|
|
|
if excluded_versions:
|
|
|
|
logger.debug('%-15.15s has some compatible versions', pname)
|
|
|
|
filtered_projects.add(pname)
|
|
|
|
continue
|
2013-02-17 03:14:42 -05:00
|
|
|
if translated_versions:
|
2013-02-21 08:37:00 -05:00
|
|
|
logger.debug('%-15.15s is compatible after translation', pname)
|
|
|
|
translated_projects.add(pname)
|
|
|
|
continue
|
|
|
|
logger.debug('%-15.15s is fully compatible', pname)
|
|
|
|
compatible_projects.add(pname)
|
|
|
|
|
|
|
|
def print_report(self):
|
|
|
|
print("Analysing {}".format(self.title))
|
|
|
|
for category in self.categories:
|
|
|
|
print(" ", category)
|
|
|
|
|
|
|
|
|
|
|
|
def main(pepno = '426'):
|
|
|
|
print('Comparing PEP %s version sort to setuptools.' % pepno)
|
2013-02-17 03:14:42 -05:00
|
|
|
|
2013-02-21 08:37:00 -05:00
|
|
|
projects, public = get_projects(VERSION_CACHE)
|
|
|
|
print()
|
|
|
|
Analysis("release versions", public, releases_only=True).print_report()
|
|
|
|
print()
|
|
|
|
Analysis("public versions", public).print_report()
|
|
|
|
print()
|
|
|
|
Analysis("all versions", projects).print_report()
|
2013-02-17 03:14:42 -05:00
|
|
|
# Uncomment the line below to explore differences in details
|
|
|
|
# import pdb; pdb.set_trace()
|
|
|
|
# Grepping the log files is also informative
|
|
|
|
# e.g. "grep unequal pep426sort.log" for the PEP 426 sort differences
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if len(sys.argv) > 1 and sys.argv[1] == '386':
|
|
|
|
pepno = '386'
|
|
|
|
else:
|
|
|
|
pepno = '426'
|
|
|
|
logname = 'pep{}sort.log'.format(pepno)
|
|
|
|
logging.basicConfig(level=logging.DEBUG, filename=logname,
|
|
|
|
filemode='w', format='%(message)s')
|
|
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
main(pepno)
|
|
|
|
|