这种类型存在吗?

时间:2014-12-29 15:45:47

标签: java algorithm sorting

我一直在阅读一些关于算法的旧书和学习各种类型的书。看起来所有最快的排序算法都在大约O(nLogn)时间运行,它让我想到为什么这是我们能做的最好的?我写了另一种似乎在某些情况下运行得更好的算法(除非我错过了什么),但在其他情况下却非常糟糕。这已经是一个正在使用的算法,我只是在这里重新发明轮子吗?

public class Main {

public static void main(String[] args) {
    // array sort looks like it performs best in this example.
    // this is because N is pretty close in value to (max - min) in the array
    int[] arr = { 5, 26, 3, 32, 27, 9, 24, 29, 6, 37, 16, 10, 12, 28, 31, 22, 8, 20, 18, 2, 35, 14, 36, 7, 4, 15, 21};
    arraySort(arr);
    for (int i = 0; i < arr.length; i++) {
        System.out.print(arr[i] + " ");
    }

    // array sort does very poorly here.
    // this is because N is 4 which is very far from the value (max - min = 999) in the array
    int[] arr2 = {1, 1000, 100, 10};
    arraySort(arr2);
    for (int i = 0; i < arr2.length; i++) {
        System.out.print(arr2[i] + " ");
    }

    // I think an algorithm could first check if N and maxDifference are close, then it could
    // make sure that maxDifference is not so big that we start to care about size constraints.
    // If it meets those criteria, we can use arraySort, and if not we can use quicksort.
}

/**
 * Sorts in O(N) + O(maxDifference), where maxDifference is the difference between
 * the maximum and minimum values in the array.  Spatial complexity is an array of
 * size maxDifference.
 */
private static void arraySort(int[] arr) {
    if (arr==null || arr.length ==1){//no need to sort
        return;
    }
    int loopCount = 0;  // used for computing the algorithm's complexity
    int min = arr[0];
    int max = arr[0];
    // get the max and min values
    for (int i = 0; i < arr.length; i++) {
        loopCount++;
        int element = arr[i];
        if (element < min) {
            min = element;
        } else if (element > max) {
            max = element;
        }
    }
    int maxDifference = max - min;
    // create a boolean array of size maxDifference.
    // spatial complexity can get pretty bad when 
    // there is a huge maxDifference
    boolean[] positions = new boolean[maxDifference + 1];
    for (int i = 0; i < arr.length; i++) {
        loopCount++;
        int element = arr[i];
        // flag this position as true for later traversal
        positions[element - min] = true;
    }

    int count = 0;
    // traverse the array
    for (int i = 0; i < positions.length; i++) {
        loopCount++;
        boolean element = positions[i];
        if (element) {
            // insert the number into the sorted array
            arr[count++] = i + min;
        }
    }
    int qsortComplexity = (int) (arr.length * Math.log(arr.length)/Math.log(2));
    double isortComplexity = Math.pow(arr.length, 2);
    System.out.println("N = " + arr.length);
    System.out.println("spatial complexity = " + maxDifference);
    System.out.println("complexity = " + loopCount);
    System.out.println("qsortComplexity~= " + qsortComplexity + " isortComplexity~= " + isortComplexity);
}

}

编辑如果有人感兴趣我继续修改此项以接受重复项,因此更像是计算排序。

public class Main {

public static void main(String[] args) {
    // array sort looks like it performs best in this example.
    // this is because N is pretty close in value to (max - min) in the array
    int[] arr = { 5, 26, 3, 32, 27, 9, 24, 29, 6, 37, 16, 10, 12, 28, 31, 22, 8, 20, 18, 2, 35, 14, 36, 7, 4, 15, 21};
    countingSort(arr);
    for (int i = 0; i < arr.length; i++) {
        System.out.print(arr[i] + " ");
    }

    // array sort does very poorly here.
    // this is because N is 4 which is very far from the value (max - min = 999) in the array
    int[] arr2 = {1, 1000, 100, 10};
    countingSort(arr2);
    for (int i = 0; i < arr2.length; i++) {
        System.out.print(arr2[i] + " ");
    }

    // testing duplicates
    int[] arr3 = {10, 10, 9, 5, 6, 6, 4, 3, 7, 4, 10, 5, 3, 8, 2, 9};
    countingSort(arr3);
    for (int i = 0; i < arr3.length; i++) {
        System.out.print(arr3[i] + " ");
    }

}

/**
 * Sorts in O(N) + O(maxDifference), where maxDifference is the difference between
 * the maximum and minimum values in the array.  Spatial complexity is an array of
 * size maxDifference.
 */
private static void countingSort(int[] arr) {
    if (arr==null || arr.length ==1){//no need to sort
        return;
    }
    int loopCount = 0;  // used for computing the algorithm's complexity
    int min = arr[0];
    int max = arr[0];
    // get the max and min values
    for (int i = 0; i < arr.length; i++) {
        loopCount++;
        int element = arr[i];
        if (element < min) {
            min = element;
        } else if (element > max) {
            max = element;
        }
    }
    int maxDifference = max - min;
    int[] positionCounts = new int[maxDifference + 1];
    for (int i = 0; i < arr.length; i++) {
        loopCount++;
        int element = arr[i];
        // add to the count at that position
        positionCounts[element - min] +=1;
    }

    int count = 0;
    // traverse the array
    for (int i = 0; i < positionCounts.length; i++) {
        int element = positionCounts[i];
        if (element == 0){
            loopCount++;
        }
        for (int j=0; j<element; j++){
            // insert the number into the sorted array
            arr[count++] = i + min;
            loopCount++;
        }

    }
    int qsortComplexity = (int) (arr.length * Math.log(arr.length)/Math.log(2));
    double isortComplexity = Math.pow(arr.length, 2);
    System.out.println("N = " + arr.length);
    System.out.println("spatial complexity = " + maxDifference);
    System.out.println("complexity = " + loopCount);
    System.out.println("qsortComplexity~= " + qsortComplexity + " isortComplexity~= " + isortComplexity);
}

}

2 个答案:

答案 0 :(得分:5)

您已经彻底改造了counting sort的变体[*]。

这不是comparison sorting算法,因此最差情况下比较的Ω(n log n)下限不适用:此算法确实可以在较少的操作中运行,提供某些条件满足

  • 主要条件是值的范围是有限的(范围是算法的时间复杂度中的一个术语)。
  • 对于算法,另一个条件是元素是唯一的。

计算排序和其他相关算法(例如bucket sortradix sort等)是您工具箱中的有用工具。它们不像quicksort那样普遍适用,但在适当的情况下可能是正确的工具。请参阅Wikipedia以获取comparison of bucket sort with other algorithms

[*]顾名思义,经典计数排序计算值而不是使用布尔标志,所以更通用。你的算法无法正确处理重复的元素:它会丢失所有元素。

答案 1 :(得分:0)

这似乎是counting sort,正如您所注意到的那样,对于具有整数键的项目集合来说,这是一个很好的解决方案,其整数键的扩展不会明显大于元素数量。

计数排序是存储桶排序的一种变体,使用大小为1的存储桶。这会增加排序的内存需求,但会降低时间复杂度。