
时间:2012-08-30 15:39:23

标签: python numpy


numpy API中是否有一个函数可以通过数据只传递一次max和min?

12 个答案:

答案 0 :(得分:30)


numpy API中是否有一个函数可以通过数据只传递一次max和min?

没有。在撰写本文时,没有这样的功能。 (是的,如果这样的函数,它的性能显着比在大数组上连续调用numpy.amin()numpy.amax()更好。 )

答案 1 :(得分:26)


minval = array[0]
maxval = array[0]
for i in array:
    if i < minval:
       minval = i
    if i > maxval:
       maxval = i

虽然这里只有一个循环,但仍有2个检查。 (而不是有2个循环,每个1检查)。真的,你唯一能节省的就是1循环的开销。如果数组真的很大,就像你说的那样,与实际循环的工作负载相比,这种开销很小。 (注意,这都是用C实现的,所以循环或多或少都是免费的。)


这里有一些可以通过f2py编译成python模块的fortran代码(也许Cython guru可以出现并将其与优化的C版本进行比较......):

subroutine minmax1(a,n,amin,amax)
  implicit none
  !f2py intent(hidden) :: n
  !f2py intent(out) :: amin,amax
  !f2py intent(in) :: a
  integer n
  real a(n),amin,amax
  integer i

  amin = a(1)
  amax = a(1)
  do i=2, n
     if(a(i) > amax)then
        amax = a(i)
     elseif(a(i) < amin) then
        amin = a(i)
end subroutine minmax1

subroutine minmax2(a,n,amin,amax)
  implicit none
  !f2py intent(hidden) :: n
  !f2py intent(out) :: amin,amax
  !f2py intent(in) :: a
  integer n
  real a(n),amin,amax
  amin = minval(a)
  amax = maxval(a)
end subroutine minmax2


f2py -m untitled -c fortran_code.f90


import timeit

size = 100000
repeat = 10000

print timeit.timeit(
    'np.min(a); np.max(a)',
    setup='import numpy as np; a = np.arange(%d, dtype=np.float32)' % size,
    number=repeat), " # numpy min/max"

print timeit.timeit(
    setup='import numpy as np; import untitled; a = np.arange(%d, dtype=np.float32)' % size,
    number=repeat), '# minmax1'

print timeit.timeit(
    setup='import numpy as np; import untitled; a = np.arange(%d, dtype=np.float32)' % size,
    number=repeat), '# minmax2'


8.61869883537 # numpy min/max
1.60417699814 # minmax1
2.30169081688 # minmax2


notes - 将大小增加10**a因子并将重复次数减少10**a(保持问题大小不变)确实会改变性能,但不会以一种看似一致的方式表明在python中内存性能和函数调用开销之间存在一些相互作用。甚至将fortran beats numpy中的简单min实现比较大约2倍...

答案 2 :(得分:18)

如果对您有用,可以找到名为numpy.ptp的(最大 - 最小)函数:

>>> import numpy
>>> x = numpy.array([1,2,3,4,5,6])
>>> x.ptp()


编辑: ptp just calls min and max under the hood

答案 3 :(得分:13)


import numpy
import numba

def minmax(x):
    maximum = x[0]
    minimum = x[0]
    for i in x[1:]:
        if i > maximum:
            maximum = i
        elif i < minimum:
            minimum = i
    return (minimum, maximum)

x = numpy.random.rand(1000000)
print(minmax(x) == (x.min(), x.max()))

它也应该比Numpy的min() & max()实施更快。而且无需编写单个C / Fortran代码行。


答案 4 :(得分:9)


import numpy as np

def extrema_np(arr):
    return np.max(arr), np.min(arr)
import numba as nb

def extrema_loop_nb(arr):
    n = arr.size
    max_val = min_val = arr[0]
    for i in range(1, n):
        item = arr[i]
        if item > max_val:
            max_val = item
        elif item < min_val:
            min_val = item
    return max_val, min_val
import numba as nb

def extrema_while_nb(arr):
    n = arr.size
    odd = n % 2
    if not odd:
        n -= 1
    max_val = min_val = arr[0]
    i = 1
    while i < n:
        x = arr[i]
        y = arr[i + 1]
        if x > y:
            x, y = y, x
        min_val = min(x, min_val)
        max_val = max(y, max_val)
        i += 2
    if not odd:
        x = arr[n]
        min_val = min(x, min_val)
        max_val = max(x, max_val)
    return max_val, min_val
%%cython -c-O3 -c-march=native -a
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True, infer_types=True

import numpy as np

cdef void _extrema_loop_cy(
        long[:] arr,
        size_t n,
        long[:] result):
    cdef size_t i
    cdef long item, max_val, min_val
    max_val = arr[0]
    min_val = arr[0]
    for i in range(1, n):
        item = arr[i]
        if item > max_val:
            max_val = item
        elif item < min_val:
            min_val = item
    result[0] = max_val
    result[1] = min_val

def extrema_loop_cy(arr):
    result = np.zeros(2, dtype=arr.dtype)
    _extrema_loop_cy(arr, arr.size, result)
    return result[0], result[1]
%%cython -c-O3 -c-march=native -a
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True, infer_types=True

import numpy as np

cdef void _extrema_while_cy(
        long[:] arr,
        size_t n,
        long[:] result):
    cdef size_t i, odd
    cdef long x, y, max_val, min_val
    max_val = arr[0]
    min_val = arr[0]
    odd = n % 2
    if not odd:
        n -= 1
    max_val = min_val = arr[0]
    i = 1
    while i < n:
        x = arr[i]
        y = arr[i + 1]
        if x > y:
            x, y = y, x
        min_val = min(x, min_val)
        max_val = max(y, max_val)
        i += 2
    if not odd:
        x = arr[n]
        min_val = min(x, min_val)
        max_val = max(x, max_val)
    result[0] = max_val
    result[1] = min_val

def extrema_while_cy(arr):
    result = np.zeros(2, dtype=arr.dtype)
    _extrema_while_cy(arr, arr.size, result)
    return result[0], result[1]





最后,请注意,这些参数都不像np.min() / np.max()那样灵活(就n-dim支持,axis参数等而言)。


答案 5 :(得分:7)




_max = ar[0]
_min=  ar[0]
for ii in xrange(len(ar)):
    if _max > ar[ii]: _max = ar[ii]
    if _min < ar[ii]: _min = ar[ii]


## for an even-sized array
_max = ar[0]
_min = ar[0]
for ii in xrange(0, len(ar), 2)):  ## iterate over every other value in the array
    f1 = ar[ii]
    f2 = ar[ii+1]
    if (f1 < f2):
        if f1 < _min: _min = f1
        if f2 > _max: _max = f2
        if f2 < _min: _min = f2
        if f1 > _max: _max = f1

这里的代码是用Python编写的,显然是为了你使用C或Fortran或Cython的速度,但这样你每次迭代进行3次比较,使用len(ar)/ 2次迭代,给出3/2 * len(ar比较。与此相反,进行比较“显而易见”,每次迭代进行两次比较,导致2 * len(ar)比较。为您节省25%的比较时间。


答案 6 :(得分:6)



import numba as nb
import numpy as np

def minmax(array):
    # Ravel the array and return early if it's empty
    array = array.ravel()
    length = array.size
    if not length:

    # We want to process two elements at once so we need
    # an even sized array, but we preprocess the first and
    # start with the second element, so we want it "odd"
    odd = length % 2
    if not odd:
        length -= 1

    # Initialize min and max with the first item
    minimum = maximum = array[0]

    i = 1
    while i < length:
        # Get the next two items and swap them if necessary
        x = array[i]
        y = array[i+1]
        if x > y:
            x, y = y, x
        # Compare the min with the smaller one and the max
        # with the bigger one
        minimum = min(x, minimum)
        maximum = max(y, maximum)
        i += 2

    # If we had an even sized array we need to compare the
    # one remaining item too.
    if not odd:
        x = array[length]
        minimum = min(x, minimum)
        maximum = max(x, maximum)

    return minimum, maximum


arr = np.random.random(3000000)
assert minmax(arr) == minmax_peque(arr)  # warmup and making sure they are identical 
%timeit minmax(arr)            # 100 loops, best of 3: 2.1 ms per loop
%timeit minmax_peque(arr)      # 100 loops, best of 3: 2.75 ms per loop

正如预期的那样,新的minmax实现仅花费大约3/4的时间实现(2.1 / 2.75 = 0.7636363636363637

答案 7 :(得分:5)

乍一看,numpy.histogram 出现来执行此操作:

count, (amin, amax) = numpy.histogram(a, bins=1)

...但如果您查看该功能的source,则只需单独调用a.min()a.max(),因此无法避免此问题中解决的性能问题。 : - (


答案 8 :(得分:5)

没人提到numpy.percentile,所以我想我会的。如果你要求[0, 100]百分位数,它会给你一个由两个元素组成的数组,分钟(​​第0百分位数)和最大值(第100百分位数)。


In [1]: import numpy

In [2]: a = numpy.random.normal(0, 1, 1000000)

In [3]: %%timeit
   ...: lo, hi = numpy.amin(a), numpy.amax(a)
100 loops, best of 3: 4.08 ms per loop

In [4]: %%timeit
   ...: lo, hi = numpy.percentile(a, [0, 100])
100 loops, best of 3: 17.2 ms per loop

In [5]: numpy.__version__
Out[5]: '1.14.4'

如果仅请求[0, 100],Numpy的未来版本可能会在特殊情况下跳过正常的百分位数计算。在没有向界面添加任何内容的情况下,有一种方法可以在一次调用中询问Numpy的最小值和最大值(与接受的答案中的内容相反),但是库的标准实现并没有利用这种情况使它值得。

答案 9 :(得分:1)

无论如何,这对我来说都是值得的,所以我将为可能感兴趣的人提出最困难,最不优雅的解决方案。我的解决方案是在C ++中以一次通过算法实现多线程min-max,然后使用它创建一个Python扩展模块。这项工作需要学习一些如何使用Python和NumPy C / C ++ API的开销,在这里,我将展示代码,并为希望沿这条路走的人提供一些小的解释和参考。


这里没有什么太有趣的。数组分为大小为length / workers的块。为future中的每个块计算最小值/最大值,然后对其进行扫描以获取全局最小值/最大值。

    // multi-threaded min/max algorithm

    #include <algorithm>
    #include <future>
    #include <vector>

    namespace mt_np {

     * Get {min,max} in interval [begin,end)
    template <typename T> std::pair<T, T> min_max(T *begin, T *end) {
      T min{*begin};
      T max{*begin};
      while (++begin < end) {
        if (*begin < min) {
          min = *begin;
        } else if (*begin > max) {
          max = *begin;
      return {min, max};

     * get {min,max} in interval [begin,end) using #workers for concurrency
    template <typename T>
    std::pair<T, T> min_max_mt(T *begin, T *end, int workers) {
      const long int chunk_size = std::max((end - begin) / workers, 1l);
      std::vector<std::future<std::pair<T, T>>> min_maxes;
      // fire up the workers
      while (begin < end) {
        T *next = std::min(end, begin + chunk_size);
        min_maxes.push_back(std::async(min_max<T>, begin, next));
        begin = next;
      // retrieve the results
      auto min_max_it = min_maxes.begin();
      auto v{min_max_it->get()};
      T min{v.first};
      T max{v.second};
      while (++min_max_it != min_maxes.end()) {
        v = min_max_it->get();
        min = std::min(min, v.first);
        max = std::max(max, v.second);
      return {min, max};
    }; // namespace mt_np


这是开始变得丑陋的地方。在Python中使用C ++代码的一种方法是实现扩展模块。可以使用distutils.core标准模块来构建和安装该模块。 Python文档中涵盖了对此的完整描述。 注意:当然还有其他获得类似结果的方法,引用


本指南仅涵盖创建扩展的基本工具,该扩展是此版本CPython的一部分。 Cython,cffi,SWIG和Numba等第三方工具为创建Python的C和C ++扩展提供了更简单,更复杂的方法。

从本质上讲,这条路线可能比实际更学术。话虽如此,我接下来要做的是,紧紧紧紧紧紧本教程,创建一个模块文件。这实际上是distutils知道如何处理代码并从中创建Python模块的样板。在执行任何上述操作之前,创建一个Python 虚拟环境可能是明智的选择,这样您就不会污染您的系统软件包(请参见。


// C++ module implementation for multi-threaded min/max for np


#include <python3.6/numpy/arrayobject.h>

#include "mt_np.h"

#include <cstdint>
#include <iostream>

using namespace std;

 * check:
 *  shape
 *  stride
 *  data_type
 *  byteorder
 *  alignment
static bool check_array(PyArrayObject *arr) {
  if (PyArray_NDIM(arr) != 1) {
    PyErr_SetString(PyExc_RuntimeError, "Wrong shape, require (1,n)");
    return false;
  if (PyArray_STRIDES(arr)[0] != 8) {
    PyErr_SetString(PyExc_RuntimeError, "Expected stride of 8");
    return false;
  PyArray_Descr *descr = PyArray_DESCR(arr);
  if (descr->type != NPY_LONGLTR && descr->type != NPY_DOUBLELTR) {
    PyErr_SetString(PyExc_RuntimeError, "Wrong type, require l or d");
    return false;
  if (descr->byteorder != '=') {
    PyErr_SetString(PyExc_RuntimeError, "Expected native byteorder");
    return false;
  if (descr->alignment != 8) {
    cerr << "alignment: " << descr->alignment << endl;
    PyErr_SetString(PyExc_RuntimeError, "Require proper alignement");
    return false;
  return true;

template <typename T>
static PyObject *mt_np_minmax_dispatch(PyArrayObject *arr) {
  npy_intp size = PyArray_SHAPE(arr)[0];
  T *begin = (T *)PyArray_DATA(arr);
  auto minmax =
      mt_np::min_max_mt(begin, begin + size, thread::hardware_concurrency());
  return Py_BuildValue("(L,L)", minmax.first, minmax.second);

static PyObject *mt_np_minmax(PyObject *self, PyObject *args) {
  PyArrayObject *arr;
  if (!PyArg_ParseTuple(args, "O", &arr))
    return NULL;
  if (!check_array(arr))
    return NULL;
  switch (PyArray_DESCR(arr)->type) {
  case NPY_LONGLTR: {
    return mt_np_minmax_dispatch<int64_t>(arr);
  } break;
    return mt_np_minmax_dispatch<double>(arr);
  } break;
  default: {
    PyErr_SetString(PyExc_RuntimeError, "Unknown error");
    return NULL;

static PyObject *get_concurrency(PyObject *self, PyObject *args) {
  return Py_BuildValue("I", thread::hardware_concurrency());

static PyMethodDef mt_np_Methods[] = {
    {"mt_np_minmax", mt_np_minmax, METH_VARARGS, "multi-threaded np min/max"},
    {"get_concurrency", get_concurrency, METH_VARARGS,
     "retrieve thread::hardware_concurrency()"},
    {NULL, NULL, 0, NULL} /* sentinel */

static struct PyModuleDef mt_np_module = {PyModuleDef_HEAD_INIT, "mt_np", NULL,
                                          -1, mt_np_Methods};

PyMODINIT_FUNC PyInit_mt_np() { return PyModule_Create(&mt_np_module); }

在此文件中,Python和NumPy API都有大量使用,有关更多信息,请参见:,对于NumPy:




from distutils.core import setup,Extension

module = Extension('mt_np', sources = [''])

setup (name = 'mt_np', 
       version = '1.0', 
       description = 'multi-threaded min/max for np arrays',
       ext_modules = [module])

要最终安装该模块,请在您的虚拟环境中执行python3 install


最后,我们可以测试一下C ++实现是否真的胜过了天真的使用NumPy。为此,这是一个简单的测试脚本:

# compare numpy min/max vs multi-threaded min/max

import numpy as np
import mt_np
import timeit

def normal_min_max(X):
  return (np.min(X),np.max(X))


for ssize in np.logspace(3,8,6):
  size = int(ssize)
  print('sample size:', size)
  samples = np.random.normal(0,50,(2,size))
  for sample in samples:
    print('np:', timeit.timeit('normal_min_max(sample)',
    print('mt:', timeit.timeit('mt_np.mt_np_minmax(sample)',


sample size: 1000  
np: 0.00012079699808964506  
mt: 0.002468645994667895  
np: 0.00011947099847020581  
mt: 0.0020772050047526136  
sample size: 10000  
np: 0.00024697799381101504  
mt: 0.002037393998762127  
np: 0.0002713389985729009  
mt: 0.0020942929986631498  
sample size: 100000  
np: 0.0007130410012905486  
mt: 0.0019842900001094677  
np: 0.0007540129954577424  
mt: 0.0029724110063398257  
sample size: 1000000  
np: 0.0094779249993735  
mt: 0.007134920000680722  
np: 0.009129883001151029  
mt: 0.012836456997320056  
sample size: 10000000  
np: 0.09471094200125663  
mt: 0.0453535050037317  
np: 0.09436299200024223  
mt: 0.04188535599678289  
sample size: 100000000  
np: 0.9537652180006262  
mt: 0.3957935369980987  
np: 0.9624398809974082  
mt: 0.4019058070043684  

这些结果远没有线程早先的结果令人鼓舞,后者表明速度大约是3.5倍,并且没有包含多线程。我获得的结果在某种程度上是合理的,我希望线程的开销会占据整个数组很大的时间,这时性能的提高将开始接近std::thread::hardware_concurrency x的提高。



答案 10 :(得分:1)

previous answer的启发,我编写了numba实现,从二维数组返回axis = 0的minmax。比调用numpy最小/最大快5倍。 也许有人会发现它有用。

from numba import jit

def minmax(x):
    """Return minimum and maximum from 2D array for axis=0."""    
    m, n = len(x), len(x[0])
    mi, ma = np.empty(n), np.empty(n)
    mi[:] = ma[:] = x[0]
    for i in range(1, m):
        for j in range(n):
            if x[i, j]>ma[j]: ma[j] = x[i, j]
            elif x[i, j]<mi[j]: mi[j] = x[i, j]
    return mi, ma

x = np.random.normal(size=(256, 11))
mi, ma = minmax(x)

np.all(mi == x.min(axis=0)), np.all(ma == x.max(axis=0))
# (True, True)

%timeit x.min(axis=0), x.max(axis=0) 
# 15.9 µs ± 9.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit minmax(x) 
# 2.62 µs ± 31.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

答案 11 :(得分:0)


mn, mx = np.sort(ar)[[0, -1]]



mn, mx = np.percentile(ar, [0, 100])
