快速选择C算法比C Qsort更快

时间:2018-08-25 10:36:34

标签: algorithm cython quicksort quickselect

我已尝试实现本文(3 way quicksort (C implementation))中所述的C QuickSelect算法。 但是,我得到的性能仅是默认qsort的5到10倍(即使使用初始混洗)。 我试图深入研究此处提供的原始qsort源代码(https://github.com/lattera/glibc/blob/master/stdlib/qsort.c),但是它太复杂了。 有没有人有一个更简单,更好的算法? 任何想法都欢迎。 谢谢, 注意:我最初的问题是尝试将数组的第K个最小值获取到第K个索引。所以我计划打K次quickselect 编辑1:这是从上面的链接复制并改编而成的Cython代码

cdef void qswap(void* a, void* b, const size_t size) nogil:
    cdef char temp[size]# C99, use malloc otherwise
    #char serves as the type for "generic" byte arrays

    memcpy(temp, b,    size)
    memcpy(b,    a,    size)
    memcpy(a,    temp, size)

cdef void qshuffle(void* base, size_t num, size_t size) nogil: #implementation of Fisher
    cdef int i, j, tmp# create local variables to hold values for shuffle

    for i in range(num - 1, 0, -1): # for loop to shuffle
        j = c_rand() % (i + 1)#randomise j for shuffle with Fisher Yates
        qswap(base + i*size, base + j*size, size)

cdef void partition3(void* base,
                      size_t *low, size_t *high, size_t size,
                      QComparator compar) nogil:       
    # Modified median-of-three and pivot selection.                      
    cdef void *ptr = base
    cdef size_t lt = low[0]
    cdef size_t gt = high[0] # lt is the pivot
    cdef size_t i = lt + 1# (+1 !) we don't compare pivot with itself
    cdef int c = 0

    while (i <= gt):
        c = compar(ptr + i * size, ptr + lt * size)
        if (c < 0):# base[i] < base[lt] => swap(i++,lt++)
            qswap(ptr + lt * size, ptr + i * size, size)
            i += 1
            lt += 1
        elif (c > 0):#base[i] > base[gt] => swap(i, gt--)
            qswap(ptr + i * size, ptr + gt* size, size)
            gt -= 1
        else:#base[i] == base[gt]
            i += 1
    #base := [<<<<<lt=====gt>>>>>>]
    low[0] = lt                                          
    high[0] = gt 


cdef void qselectk3(void* base, size_t lo, size_t hi, 
   size_t size, size_t k, 
   QComparator compar) nogil:                                             
    cdef size_t low = lo                                          
    cdef size_t high = hi                                                      

    partition3(base, &low, &high,  size, compar)    

    if ((k - 1) < low): #k lies in the less-than-pivot partition           
        high = low - 1
        low = lo                      
    elif ((k - 1) >= low and  (k - 1) <= high): #k lies in the equals-to-pivot partition
        qswap(base, base + size*low, size)
        return                              
    else: # k > high => k lies in the greater-than-pivot partition                    
        low = high + 1
        high = hi 
    qselectk3(base, low, high, size, k, compar)

"""
 A selection algorithm to find the nth smallest elements in an unordered list. 
 these elements ARE placed at the nth positions of the input array                                                                         
"""
cdef void qselect(void* base, size_t num, size_t size,
                              size_t n,
                              QComparator compar) nogil:
    cdef int k
    qshuffle(base, num, size)
    for k in range(n):
        qselectk3(base + size*k, 0, num - k - 1, size, 1, compar)

我使用python timeit来获得pyselect(N = 50)和pysort的性能。 像这样

def testPySelect():
    A = np.random.randint(16, size=(10000), dtype=np.int32)
    pyselect(A, 50)
timeit.timeit(testPySelect, number=1)

def testPySort():
    A = np.random.randint(16, size=(10000), dtype=np.int32)
    pysort(A)
timeit.timeit(testPySort, number=1)

2 个答案:

答案 0 :(得分:1)

@chqrlie的答案是一个很好的最终答案,但为了完成该帖子,我将发布Cython版本以及基准测试结果。 简而言之,在长向量上,所提出的解决方案比qsort快2倍!


    cdef void qswap2(void *aptr, void *bptr, size_t size) nogil:
        cdef uint8_t* ac = <uint8_t*>aptr
        cdef uint8_t* bc = <uint8_t*>bptr
        cdef uint8_t t
        while (size > 0): t = ac[0]; ac[0] = bc[0]; bc[0] = t; ac += 1; bc += 1; size -= 1

    cdef struct qselect2_stack:
        uint8_t *base
        uint8_t *last

    cdef void qselect2(void *base, size_t nmemb, size_t size,
                      size_t k, QComparator compar) nogil:
        cdef qselect2_stack stack[64]
        cdef qselect2_stack *sp = &stack[0]

        cdef uint8_t *lb
        cdef uint8_t*ub
        cdef uint8_t *p
        cdef uint8_t *i
        cdef uint8_t *j
        cdef uint8_t *top

        if (nmemb < 2 or size <= 0):
            return

        top = <uint8_t *>base
        if(k < nmemb): 
            top += k*size 
        else: 
            top += nmemb*size

        sp.base = <uint8_t *>base
        sp.last = <uint8_t *>base + (nmemb - 1) * size
        sp += 1

        cdef size_t offset

        while (sp > stack):
            sp -= 1
            lb = sp.base
            ub = sp.last

            while (lb < ub and lb < top):
                #select middle element as pivot and exchange with 1st element
                offset = (ub - lb) >> 1
                p = lb + offset - offset % size
                qswap2(lb, p, size)

                #partition into two segments
                i = lb + size
                j = ub
                while 1:
                    while (i < j and compar(lb, i) > 0):
                        i += size
                    while (j >= i and compar(j, lb) > 0):
                        j -= size
                    if (i >= j):
                        break
                    qswap2(i, j, size)
                    i += size
                    j -= size

                # move pivot where it belongs
                qswap2(lb, j, size)

                # keep processing smallest segment, and stack largest
                if (j - lb <= ub - j):
                    sp.base = j + size
                    sp.last = ub
                    sp += 1
                    ub = j - size
                else:
                    sp.base = lb
                    sp.last = j - size
                    sp += 1
                    lb = j + size

    cdef int int_comp(void* a, void* b) nogil:
        cdef int ai = (<int*>a)[0] 
        cdef int bi = (<int*>b)[0]
        return (ai > bi ) - (ai < bi)

    def pyselect2(numpy.ndarray[int, ndim=1, mode="c"] na, int n):
        cdef int* a = <int*>&na[0]
        qselect2(a, len(na), sizeof(int), n, int_comp)

以下是基准测试结果(1,000个测试):

#of elements   K      #qsort (s)                     #qselect2 (s)
1,000          50     0.1261                         0.0895
1,000          100    0.1261                         0.0910

10,000         50     0.8113                         0.4157
10,000         100    0.8113                         0.4367
10,000         1,000  0.8113                         0.4746

100,000        100    7.5428                         3.8259
100,000        1,000  7,5428                         3.8325
100,000        10,000 7,5428                         4.5727

对于那些好奇的人来说,这段代码是使用神经网络进行表面重建领域的一颗明珠。 再次感谢@chqrlie,您的代码在Web上是唯一的。

答案 1 :(得分:0)

这是针对您目的的快速实现:qsort_selectqsort的简单实现,可自动修剪不必要的范围。

在没有&& lb < top的情况下,它的行为与常规qsort相似,但在病理情况下,高级版本的启发式算法更好。此额外测试会阻止对目标 0 ..(k-1)以外的范围进行完全排序。该函数选择k的最小值并将其排序,数组的其余部分具有不确定的剩余值。

#include <stdio.h>
#include <stdint.h>

static void exchange_bytes(uint8_t *ac, uint8_t *bc, size_t size) {
    while (size-- > 0) { uint8_t t = *ac; *ac++ = *bc; *bc++ = t; }
}

/* select and sort the k smallest elements from an array */
void qsort_select(void *base, size_t nmemb, size_t size,
                  int (*compar)(const void *a, const void *b), size_t k)
{
    struct { uint8_t *base, *last; } stack[64], *sp = stack;
    uint8_t *lb, *ub, *p, *i, *j, *top;

    if (nmemb < 2 || size <= 0)
        return;

    top = (uint8_t *)base + (k < nmemb ? k : nmemb) * size;
    sp->base = (uint8_t *)base;
    sp->last = (uint8_t *)base + (nmemb - 1) * size;
    sp++;
    while (sp > stack) {
        --sp;
        lb = sp->base;
        ub = sp->last;
        while (lb < ub && lb < top) {
            /* select middle element as pivot and exchange with 1st element */
            size_t offset = (ub - lb) >> 1;
            p = lb + offset - offset % size;
            exchange_bytes(lb, p, size);

            /* partition into two segments */
            for (i = lb + size, j = ub;; i += size, j -= size) {
                while (i < j && compar(lb, i) > 0)
                    i += size;
                while (j >= i && compar(j, lb) > 0)
                    j -= size;
                if (i >= j)
                    break;
                exchange_bytes(i, j, size);
            }
            /* move pivot where it belongs */
            exchange_bytes(lb, j, size);

            /* keep processing smallest segment, and stack largest */
            if (j - lb <= ub - j) {
                sp->base = j + size;
                sp->last = ub;
                sp++;
                ub = j - size;
            } else {
                sp->base = lb;
                sp->last = j - size;
                sp++;
                lb = j + size;
            }
        }
    }
}

int int_cmp(const void *a, const void *b) {
    int aa = *(const int *)a;
    int bb = *(const int *)b;
    return (aa > bb) - (aa < bb);
}

#define ARRAY_SIZE  50000

int array[ARRAY_SIZE];

int main(void) {
    int i;
    for (i = 0; i < ARRAY_SIZE; i++) {
        array[i] = ARRAY_SIZE - i;
    }
    qsort_select(array, ARRAY_SIZE, sizeof(*array), int_cmp, 50);
    for (i = 0; i < 50; i++) {
        printf("%d%c", array[i], i + 1 == 50 ? '\n' : ',');
    }
    return 0;
}
相关问题