Python:为什么基于生成器的范围比xrange慢X2?

时间:2015-08-10 20:23:15

标签: python performance cpython

只是好奇,我已经用Python编写了3个测试并使用timeit将它们计时:

import timeit

# simple range based on generator
def my_range(start, stop):
    i = start
    while (i < stop):
        yield i
        i += 1

# test regular range
def test_range():
    x = range(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test xrange
def test_xrange():
    x = xrange(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test my range
def test_my_range():
    x = my_range(1, 100000)
    sum = 0

    for i in x:
        sum += i


print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)

我已经获得了这些基准:

regular range based test - 0.616795163262

xrange based test - 0.537716731096

my_range (generator) based test - **1.27872886337**

我的范围甚至比创建列表的范围慢了X2。为什么呢?

  1. 是否直接使用C实施了xrange() / range()

  2. 它们是否在没有条件检查的情况下实施?

  3. 谢谢!

2 个答案:

答案 0 :(得分:4)

我觉得简单的答案是xrange()内置并用C语言编写。

我在测试中添加了另一个案例(见下文):基于CPython源的xrange() import timeit from collections import Sequence, Iterator from math import ceil # simple range based on generator def my_range(start, stop): i = start while (i < stop): yield i i += 1 # test regular range def test_range(): x = range(1, 100000) sum = 0 for i in x: sum += i # test xrange def test_xrange(): x = xrange(1, 100000) sum = 0 for i in x: sum += i # test my range def test_my_range(): x = my_range(1, 100000) sum = 0 for i in x: sum += i class pure_python_xrange(Sequence): """Pure-Python implementation of an ``xrange`` (aka ``range`` in Python 3) object. See `the CPython documentation <http://docs.python.org/py3k/library/functions.html#range>`_ for details. """ def __init__(self, *args): if len(args) == 1: start, stop, step = 0, args[0], 1 elif len(args) == 2: start, stop, step = args[0], args[1], 1 elif len(args) == 3: start, stop, step = args else: raise TypeError('pure_python_xrange() requires 1-3 int arguments') try: start, stop, step = int(start), int(stop), int(step) except ValueError: raise TypeError('an integer is required') if step == 0: raise ValueError('pure_python_xrange() arg 3 must not be zero') elif step < 0: stop = min(stop, start) else: stop = max(stop, start) self._start = start self._stop = stop self._step = step self._len = (stop - start) // step + bool((stop - start) % step) def __repr__(self): if self._start == 0 and self._step == 1: return 'pure_python_xrange(%d)' % self._stop elif self._step == 1: return 'pure_python_xrange(%d, %d)' % (self._start, self._stop) return 'pure_python_xrange(%d, %d, %d)' % (self._start, self._stop, self._step) def __eq__(self, other): return isinstance(other, xrange) and \ self._start == other._start and \ self._stop == other._stop and \ self._step == other._step def __len__(self): return self._len def index(self, value): """Return the 0-based position of integer `value` in the sequence this xrange represents.""" diff = value - self._start quotient, remainder = divmod(diff, self._step) if remainder == 0 and 0 <= quotient < self._len: return abs(quotient) raise ValueError('%r is not in range' % value) def count(self, value): """Return the number of ocurrences of integer `value` in the sequence this xrange represents.""" # a value can occur exactly zero or one times return int(value in self) def __contains__(self, value): """Return ``True`` if the integer `value` occurs in the sequence this xrange represents.""" try: self.index(value) return True except ValueError: return False def __reversed__(self): """Return an xrange which represents a sequence whose contents are the same as the sequence this xrange represents, but in the opposite order.""" sign = self._step / abs(self._step) last = self._start + ((self._len - 1) * self._step) return pure_python_xrange(last, self._start - sign, -1 * self._step) def __getitem__(self, index): """Return the element at position ``index`` in the sequence this xrange represents, or raise :class:`IndexError` if the position is out of range.""" if isinstance(index, slice): return self.__getitem_slice(index) if index < 0: # negative indexes access from the end index = self._len + index if index < 0 or index >= self._len: raise IndexError('xrange object index out of range') return self._start + index * self._step def __getitem_slice(self, slce): """Return an xrange which represents the requested slce of the sequence represented by this xrange. """ start, stop, step = slce.start, slce.stop, slce.step if step == 0: raise ValueError('slice step cannot be 0') start = start or self._start stop = stop or self._stop if start < 0: start = max(0, start + self._len) if stop < 0: stop = max(start, stop + self._len) if step is None or step > 0: return pure_python_xrange(start, stop, step or 1) else: rv = reversed(self) rv._step = step return rv def __iter__(self): """Return an iterator which enumerates the elements of the sequence this xrange represents.""" return xrangeiterator(self) class xrangeiterator(Iterator): """An iterator for an :class:`xrange`. """ def __init__(self, xrangeobj): self._xrange = xrangeobj # Intialize the "last outputted value" to the value # just before the first value; this simplifies next() self._last = self._xrange._start - self._xrange._step self._count = 0 def __iter__(self): """An iterator is already an iterator, so return ``self``. """ return self def next(self): """Return the next element in the sequence represented by the xrange we are iterating, or raise StopIteration if we have passed the end of the sequence.""" self._last += self._xrange._step self._count += 1 if self._count > self._xrange._len: raise StopIteration() return self._last # test xrange def test_pure_python_xrange(): x = pure_python_xrange(1, 100000) sum = 0 for i in x: sum += i print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100) print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100) print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100) print timeit.timeit("test_pure_python_xrange()", setup = "from __main__ import test_pure_python_xrange", number = 100)

xrange()

结果?

  

$ python so.py

     

0.426695823669

     

0.371111869812

     

0.964643001556

     

6.06390094757

这只是解释的Python代码和C之间的区别。此外,正如上面提到的@byels,gcloud-node仅限于短整数,这可能会产生积极的影响。

答案 1 :(得分:2)

这是一个有趣的测试。查看xrange上的python 2 docs,我想到的一个猜测是,xrange可以利用类型限制(仅使用&#34;短&#34;整数)< / p>