
时间:2014-06-30 08:18:30

标签: java arrays parallel-processing fork-join


我如何使用Java的Fork-Join Framework或者一般的并行来完成这样的任务?为这个问题使用并行性是否有意义?


public class SumArray extends RecursiveTask<int[][]> {

    private static final int ROW_CUTOFF = 10;
    private static final int COL_CUTOFF = 10;

    int[][] left_;
    int[][] right_;
    int rowLo_;
    int rowHi_;
    int colLo_;
    int colHi_;

    SumArray(int[][] left, int[][] right, int rowLo, int rowHi, int colLo, int colHi) {
        left_ = left;
        right_ = right;
        rowLo_ = rowLo;
        rowHi_ = rowHi;
        colLo_ = colLo;
        colHi_ = colHi;

    protected int[][] compute() {
        if (rowHi_ - rowLo_ <= ROW_CUTOFF && colHi_ - colLo_ <= COL_CUTOFF) {
            for (int i = rowLo_; i < rowHi_; i++) {
                for (int j = colLo_; j < colHi_; j++) {
                    left_[i][j] += right_[i][j];
            return left_;
        int rowMid = rowLo_ + ((rowHi_ - rowLo_) / 2); 
        int colMid = colLo_ + ((colHi_ - colLo_) / 2);
        SumArray topLeft = new SumArray(left_, right_, rowLo_, rowMid, colLo_, colMid);
            SumArray topRight = new SumArray(left_, right_, rowMid, rowHi_, colLo_, colMid);
            int[][] topRightSummed = topRight.compute();
            int[][] topLeftSummed = topLeft.join();
            // ???


2 个答案:

答案 0 :(得分:4)

在针对此问题抛出线程之前,请优化单核的使用。在这种情况下,CPU缓存未命中会产生可测量的差异。例如,考虑这个示例代码,在一种情况下,它将值array [i] [j]和另一个数组[j] [i]相加。其中一个遭受了较少的CPU缓存未命中,因此比另一个快得多。以下代码可用于演示该行为。

public class Sum2D {

    public static void main( String[] args ) {
         int[][] data = createGrid(100);

        long sum = 0;
        long start1 = System.currentTimeMillis();
        for ( int i=0; i<100000; i++ ) {
            sum += sumAcrossFirst(data);

        long end1 = System.currentTimeMillis();

        long start2 = System.currentTimeMillis();
        for ( int i=0; i<100000; i++ ) {
            sum += sumAcrossSecond(data);

        long end2 = System.currentTimeMillis();

        double duration1 = (end1-start1)/1000.0;
        double duration2 = (end2-start2)/1000.0;
        System.out.println("duration1 = " + duration1);
        System.out.println("duration2 = " + duration2);
        System.out.println("sum = " + sum);

    private static int[][] createGrid(int size) {
        int[][] data = new int[size][size];

        for ( int x=0; x<size; x++ ) {
            for ( int y=0; y<size; y++ ) {
                data[x][y] = 1;

        return data;

    private static long sumAcrossFirst(int[][] data) {
        long sum = 0;

        int size = data.length;
        for ( int x=0; x<size; x++ ) {
            for ( int y=0; y<size; y++ ) {
                sum += data[x][y];

        return sum;

    private static long sumAcrossSecond(int[][] data) {
        long sum = 0;

        int size = data.length;
        for ( int x=0; x<size; x++ ) {
            for ( int y=0; y<size; y++ ) {
                sum += data[y][x];

        return sum;


另一个优化是将int [] []减少到int [],这将涉及更少的指针追逐,现代CPU预取程序将启动并将数组的下一部分保留在其缓存中。


总结阵列的最快方法是使用SIMD指令,遗憾的是,如果不使用JNI或类似的东西,它们不能直接在Java中使用。 Fork / Join是一项令人钦佩的工作,但在它加速之前它有一些开销。这意味着在并行和单核之间需要多少个整数才能达到收支平衡的门槛值更高。


所以为了开始,这是一种你可以自由攻击的方法。它演示了如何使用Java Executor;这是位于Fork / Join框架下面的线程池。

private static Executor pool = Executors.newFixedThreadPool( Runtime.getRuntime().availableProcessors() );

private static int[][] sumParallel( int[][] a, int[][] b ) throws InterruptedException {
    int[][] result = createGrid(a.length);
    CountDownLatch latch = new CountDownLatch(a.length);

    for ( int i=0; i<a.length; i++ ) {
        pool.execute( new SumTask(latch, a,b,i, result) );


    return result;

public static class SumTask implements Runnable {
    private CountDownLatch latch;

    private int[][] a;
    private int[][] b;
    private int     row;
    private int[][] result;

    public SumTask(CountDownLatch latch, int[][] a, int[][] b, int row, int[][] result) {
        this.latch = latch;

        this.a = a;
        this.b = b;
        this.row = row;
        this.result = result;

    public void run() {
        for ( int y=0; y<a.length; y++ ) {
            result[row][y] = a[row][y] + b[row][y];



public class Sum2DFJ {

    public static void main( String[] args ) throws ExecutionException, InterruptedException {
        int[][] data = {{1,2,3},{1,2,3},{1,2,3}};

        SumTask task = new SumTask(data, data);
        ForkJoinPool pool = new ForkJoinPool();


        int[][] result = task.get();

        for ( int x=0; x<data.length; x++ ) {
            for ( int y=0; y<data.length; y++ ) {
                System.out.println("result[x][y] = " + result[x][y]);


class SumTask extends RecursiveTask<int[][]> {

    private int[][] a;
    private int[][] b;

    public SumTask( int[][] a, int[][] b ) {

        this.a = a;
        this.b = b;

    protected int[][] compute() {
        int[][] result = createGrid(a.length);

        List<ForkJoinTask> children = new ArrayList();

        for ( int i=0; i<a.length; i++ ) {
            children.add( new SumChildTask(a,b,i, result) );


        return result;

    private static int[][] createGrid(int size) {
        int[][] data = new int[size][size];

        for ( int x=0; x<size; x++ ) {
            for ( int y=0; y<size; y++ ) {
                data[x][y] = 0;

        return data;

class SumChildTask extends RecursiveAction {

    private int[][] a;
    private int[][] b;
    private int row;
    private int[][] result;

    public SumChildTask(int[][] a, int[][] b, int row, int[][] result) {
        this.a = a;
        this.b = b;
        this.row = row;
        this.result = result;

    protected void compute() {
        for ( int i=0; i<b.length; i++ ) {
            result[row][i] = a[row][i] + b[row][i];

答案 1 :(得分:0)


compute() method when at threshold

int [] [] A =原始矩阵   int [] [] B =原始B矩阵   int [] [] C =新的实例化结果矩阵

int start =起始位置   int end =结束位置

//列大小相等   int columns = A [0] .length;

//为此段执行A和B中的所有行   for(int i = start; i&lt; end; i ++){

// columns for A and C saves a subscript
int[] aSide = A[i]; 
int[] bSide = B[i]; 
int[] cSide = C[i]; 

// do all the columns in both
for (int j = 0; j < columns; j++) {

    // C(i,j) = A(i, j) + B(i, j)
    cSide[j] = aSide[j] + bSide[j];        


