Question

我正在建立一个带有变量号的示例。进程并将它们绑定到具有不同体系结构和cpus数量的小型网络中的套接字。
我编译并运行：

mpiicpc avg_4.c -qopenmp -axSSE4.2,AVX,CORE-AVX2 -O3 -par-affinity=noverbose,granularity=core,compact -o b  
mpiexec.hydra -machinefile f19 -genv I_MPI_PIN=1 -genv I_MPI_PIN_DOMAIN=socket -genv I_MPI_PIN_ORDER=compact -n 1 ./b

网络（主站+从站19）f19是：
s19：1
ma：1

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include <sched.h>
#include <mpi.h>

int *create_mlu(int n_omp, int ws) {
    int *mlu = (int *)calloc(n_omp * ws, sizeof(int));
    for (int i = 0; i < ws; i++)
        for (int j = 0; j < n_omp; j++)
            mlu[j + i*n_omp] = j + 100 * i;
    return mlu;
}

int *C4_Re(int *mal, int n_omp, int wr, int ws) {
    int *rM8 = (int *)malloc(sizeof(int) * n_omp);
    char nod[MPI_MAX_PROCESSOR_NAME];
    int n_l; MPI_Get_processor_name(nod, &n_l);
    #pragma omp parallel for
    for (int i = 0; i < n_omp; i++) {
        rM8[i] = mal[i] + 10 * omp_get_thread_num();
        printf("ws%2d\t\tmpi%2d\t\tmaxTh%2d\t\tmaxPr%2d\t\tomp%2d\t\tcore%3d\t\trM8%4d\t\tnod %s\n", ws, wr, omp_get_num_threads(), omp_get_num_procs(), omp_get_thread_num(), sched_getcpu(), rM8[i], nod);
    }
    return rM8;
}

int main(void) {

    MPI_Init(NULL, NULL);

    int ts[2] = {7, 9}; //no of processes

    for (int t = 0; t < 2; t++) {

        int ws = ts[t];
        int errcodes[ws];

        MPI_Comm parentcomm, intercomm;
        MPI_Comm_get_parent(&parentcomm);

        if (parentcomm == MPI_COMM_NULL) {
            MPI_Comm_spawn("./b", MPI_ARGV_NULL, ws, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm, errcodes);
            //printf("I'm the parent.\n");
        }
        else {
            int wr;  MPI_Comm_rank(MPI_COMM_WORLD, &wr);// printf("wr %d\n", wr);
            //int ps;  MPI_Comm_size(parentcomm, &ps);// printf("ps %d\n", ps);
            //int pr;  MPI_Comm_rank(parentcomm, &pr);// printf("pr %d\n", pr);

            int n_omp = 8, *mlu = NULL;
            if (wr == 0) {
                mlu = create_mlu(n_omp, ws);
                //for (int i = 0; i < n_omp*ws; i++) printf("\tmlu[%2d] = %d\n", i, mlu[i]);
            }

            int *mal = (int *)malloc(n_omp * sizeof(int));
            MPI_Scatter(mlu, n_omp, MPI_INT, mal, n_omp, MPI_INT, 0, MPI_COMM_WORLD);
            //for (int i = 0; i < n_omp; i++) printf("\t\tmal[%2d] = %d\trank %d\n", i, mal[i], wr);

            int *rM8 = NULL;
            rM8 = C4_Re(mal, n_omp, wr, ws);

            int *rS8 = NULL;
            if (wr == 0)
                rS8 = (int *)malloc(sizeof(int) * ws * n_omp);

            MPI_Gather(rM8, n_omp, MPI_INT, rS8, n_omp, MPI_INT, 0, MPI_COMM_WORLD);

            if (wr == 0) {
                //for (int i = 0; i < n_omp * ws; i++) printf("\t\trS8[%2d] = %d\n", i, rS8[i]);
                free(mlu);
                free(rS8); }
            free(mal);
            free(rM8);
        }
        //fflush(stdout);
    }
    fflush(stdout);
    MPI_Finalize();
    return 0;
}

我的内存损坏，需要帮助找到
一些结果看起来像
ws 7 rM8-37253944 nod ma mpi 7 maxTh 6 maxPr 6 omp 4 core 4

但它们必须看起来像
ws 7 rM8 624 nod ma mpi 6 maxTh 6 maxPr 6 omp 2 core 2

其他问题
1-为什么使用 parentcomm 进行散布和收集是不正确的？我认为 parentcomm 是新的传播者
2-我应该为7和9创建不同的通讯器吗？
3-mpicc给我错误的结果，我不知道为什么

c mpi（生成散点图），用于可变数量的进程

0 个答案: