1*c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2*c4762a1bSJed Brown "This test can only be run in parallel.\n" 3*c4762a1bSJed Brown "\n"; 4*c4762a1bSJed Brown 5*c4762a1bSJed Brown /*T 6*c4762a1bSJed Brown Concepts: Mat^mat submatrix, parallel 7*c4762a1bSJed Brown Processors: n 8*c4762a1bSJed Brown T*/ 9*c4762a1bSJed Brown 10*c4762a1bSJed Brown 11*c4762a1bSJed Brown #include <petscmat.h> 12*c4762a1bSJed Brown 13*c4762a1bSJed Brown int main(int argc, char **args) 14*c4762a1bSJed Brown { 15*c4762a1bSJed Brown Mat A,*submats; 16*c4762a1bSJed Brown MPI_Comm subcomm; 17*c4762a1bSJed Brown PetscMPIInt rank,size,subrank,subsize,color; 18*c4762a1bSJed Brown PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 19*c4762a1bSJed Brown PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 20*c4762a1bSJed Brown PetscInt *rowindices,*colindices,idx,rep; 21*c4762a1bSJed Brown PetscScalar *vals; 22*c4762a1bSJed Brown IS rowis[1],colis[1]; 23*c4762a1bSJed Brown PetscViewer viewer; 24*c4762a1bSJed Brown PetscBool permute_indices,flg; 25*c4762a1bSJed Brown PetscErrorCode ierr; 26*c4762a1bSJed Brown 27*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 28*c4762a1bSJed Brown ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 29*c4762a1bSJed Brown ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 30*c4762a1bSJed Brown 31*c4762a1bSJed Brown ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");CHKERRQ(ierr); 32*c4762a1bSJed Brown m = 5; 33*c4762a1bSJed Brown ierr = PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg);CHKERRQ(ierr); 34*c4762a1bSJed Brown total_subdomains = size-1; 35*c4762a1bSJed Brown ierr = PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg);CHKERRQ(ierr); 36*c4762a1bSJed Brown permute_indices = PETSC_FALSE; 37*c4762a1bSJed Brown ierr = PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg);CHKERRQ(ierr); 38*c4762a1bSJed Brown hash = 7; 39*c4762a1bSJed Brown ierr = PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg);CHKERRQ(ierr); 40*c4762a1bSJed Brown rep = 2; 41*c4762a1bSJed Brown ierr = PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg);CHKERRQ(ierr); 42*c4762a1bSJed Brown ierr = PetscOptionsEnd();CHKERRQ(ierr); 43*c4762a1bSJed Brown 44*c4762a1bSJed Brown if (total_subdomains > size) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %D must not exceed comm size %D",total_subdomains,size); 45*c4762a1bSJed Brown if (total_subdomains < 1 || total_subdomains > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %D (comm size), got total_subdomains = %D",size,total_subdomains); 46*c4762a1bSJed Brown if (rep != 1 && rep != 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %D; must be 1 or 2",rep); 47*c4762a1bSJed Brown 48*c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 49*c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 50*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 51*c4762a1bSJed Brown ierr = MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 52*c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 53*c4762a1bSJed Brown ierr = MatSetUp(A);CHKERRQ(ierr); 54*c4762a1bSJed Brown ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 55*c4762a1bSJed Brown ierr = MatGetLocalSize(A,NULL,&n);CHKERRQ(ierr); 56*c4762a1bSJed Brown ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 57*c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,n,NULL);CHKERRQ(ierr); 58*c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL);CHKERRQ(ierr); 59*c4762a1bSJed Brown ierr = MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 60*c4762a1bSJed Brown ierr = MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 61*c4762a1bSJed Brown ierr = MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 62*c4762a1bSJed Brown ierr = MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 63*c4762a1bSJed Brown 64*c4762a1bSJed Brown ierr = PetscMalloc2(N,&cols,N,&vals);CHKERRQ(ierr); 65*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 66*c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 67*c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 68*c4762a1bSJed Brown for (j=0;j<N;++j) { 69*c4762a1bSJed Brown vals[j] = i*10000+j; 70*c4762a1bSJed Brown } 71*c4762a1bSJed Brown ierr = MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 72*c4762a1bSJed Brown } 73*c4762a1bSJed Brown ierr = PetscFree2(cols,vals);CHKERRQ(ierr); 74*c4762a1bSJed Brown ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 75*c4762a1bSJed Brown ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 76*c4762a1bSJed Brown 77*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Initial matrix:\n");CHKERRQ(ierr); 78*c4762a1bSJed Brown ierr = MatView(A,viewer);CHKERRQ(ierr); 79*c4762a1bSJed Brown 80*c4762a1bSJed Brown 81*c4762a1bSJed Brown /* 82*c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 83*c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 84*c4762a1bSJed Brown or selects indices by scrambling them using a hash. 85*c4762a1bSJed Brown */ 86*c4762a1bSJed Brown k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 87*c4762a1bSJed Brown color = rank/k; 88*c4762a1bSJed Brown ierr = MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm);CHKERRQ(ierr); 89*c4762a1bSJed Brown ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 90*c4762a1bSJed Brown ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 91*c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 92*c4762a1bSJed Brown nis = 1; 93*c4762a1bSJed Brown ierr = PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices);CHKERRQ(ierr); 94*c4762a1bSJed Brown 95*c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 96*c4762a1bSJed Brown if (permute_indices) { 97*c4762a1bSJed Brown idx = (j*hash); 98*c4762a1bSJed Brown } else { 99*c4762a1bSJed Brown idx = j; 100*c4762a1bSJed Brown } 101*c4762a1bSJed Brown rowindices[j-rstart] = idx%N; 102*c4762a1bSJed Brown colindices[j-rstart] = (idx+m)%N; 103*c4762a1bSJed Brown } 104*c4762a1bSJed Brown ierr = ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0]);CHKERRQ(ierr); 105*c4762a1bSJed Brown ierr = ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0]);CHKERRQ(ierr); 106*c4762a1bSJed Brown ierr = ISSort(rowis[0]);CHKERRQ(ierr); 107*c4762a1bSJed Brown ierr = ISSort(colis[0]);CHKERRQ(ierr); 108*c4762a1bSJed Brown ierr = PetscFree2(rowindices,colindices);CHKERRQ(ierr); 109*c4762a1bSJed Brown /* 110*c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 111*c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 112*c4762a1bSJed Brown number to be viewed. 113*c4762a1bSJed Brown */ 114*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Subdomains");CHKERRQ(ierr); 115*c4762a1bSJed Brown if (permute_indices) { 116*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer," (hash=%D)",hash);CHKERRQ(ierr); 117*c4762a1bSJed Brown } 118*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,":\n");CHKERRQ(ierr); 119*c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 120*c4762a1bSJed Brown 121*c4762a1bSJed Brown nsubdomains = 1; 122*c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 123*c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 124*c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 125*c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 126*c4762a1bSJed Brown if (s < nsubdomains) { 127*c4762a1bSJed Brown PetscInt ss; 128*c4762a1bSJed Brown ss = gsubdomainperm[s]; 129*c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 130*c4762a1bSJed Brown PetscViewer subviewer = NULL; 131*c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 132*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(subviewer,"Row IS %D\n",gs);CHKERRQ(ierr); 133*c4762a1bSJed Brown ierr = ISView(rowis[ss],subviewer);CHKERRQ(ierr); 134*c4762a1bSJed Brown ierr = PetscViewerFlush(subviewer);CHKERRQ(ierr); 135*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(subviewer,"Col IS %D\n",gs);CHKERRQ(ierr); 136*c4762a1bSJed Brown ierr = ISView(colis[ss],subviewer);CHKERRQ(ierr); 137*c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 138*c4762a1bSJed Brown ++s; 139*c4762a1bSJed Brown } 140*c4762a1bSJed Brown } 141*c4762a1bSJed Brown ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 142*c4762a1bSJed Brown } 143*c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 144*c4762a1bSJed Brown ierr = ISSort(rowis[0]);CHKERRQ(ierr); 145*c4762a1bSJed Brown ierr = ISSort(colis[0]);CHKERRQ(ierr); 146*c4762a1bSJed Brown nsubdomains = 1; 147*c4762a1bSJed Brown ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats);CHKERRQ(ierr); 148*c4762a1bSJed Brown /* 149*c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 150*c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 151*c4762a1bSJed Brown number to be viewed. 152*c4762a1bSJed Brown */ 153*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n");CHKERRQ(ierr); 154*c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 155*c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 156*c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 157*c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 158*c4762a1bSJed Brown if (s < nsubdomains) { 159*c4762a1bSJed Brown PetscInt ss; 160*c4762a1bSJed Brown ss = gsubdomainperm[s]; 161*c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 162*c4762a1bSJed Brown PetscViewer subviewer = NULL; 163*c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 164*c4762a1bSJed Brown ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 165*c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 166*c4762a1bSJed Brown ++s; 167*c4762a1bSJed Brown } 168*c4762a1bSJed Brown } 169*c4762a1bSJed Brown ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 170*c4762a1bSJed Brown } 171*c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172*c4762a1bSJed Brown if (rep == 1) goto cleanup; 173*c4762a1bSJed Brown nsubdomains = 1; 174*c4762a1bSJed Brown ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr); 175*c4762a1bSJed Brown /* 176*c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 177*c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 178*c4762a1bSJed Brown number to be viewed. 179*c4762a1bSJed Brown */ 180*c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n");CHKERRQ(ierr); 181*c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 182*c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 183*c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 184*c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 185*c4762a1bSJed Brown if (s < nsubdomains) { 186*c4762a1bSJed Brown PetscInt ss; 187*c4762a1bSJed Brown ss = gsubdomainperm[s]; 188*c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 189*c4762a1bSJed Brown PetscViewer subviewer = NULL; 190*c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 191*c4762a1bSJed Brown ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 192*c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 193*c4762a1bSJed Brown ++s; 194*c4762a1bSJed Brown } 195*c4762a1bSJed Brown } 196*c4762a1bSJed Brown ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); 197*c4762a1bSJed Brown } 198*c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 199*c4762a1bSJed Brown cleanup: 200*c4762a1bSJed Brown for (k=0;k<nsubdomains;++k) { 201*c4762a1bSJed Brown ierr = MatDestroy(submats+k);CHKERRQ(ierr); 202*c4762a1bSJed Brown } 203*c4762a1bSJed Brown ierr = PetscFree(submats);CHKERRQ(ierr); 204*c4762a1bSJed Brown for (k=0;k<nis;++k) { 205*c4762a1bSJed Brown ierr = ISDestroy(rowis+k);CHKERRQ(ierr); 206*c4762a1bSJed Brown ierr = ISDestroy(colis+k);CHKERRQ(ierr); 207*c4762a1bSJed Brown } 208*c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 209*c4762a1bSJed Brown ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr); 210*c4762a1bSJed Brown ierr = PetscFinalize(); 211*c4762a1bSJed Brown return ierr; 212*c4762a1bSJed Brown } 213*c4762a1bSJed Brown 214*c4762a1bSJed Brown 215*c4762a1bSJed Brown /*TEST 216*c4762a1bSJed Brown 217*c4762a1bSJed Brown test: 218*c4762a1bSJed Brown nsize: 2 219*c4762a1bSJed Brown args: -total_subdomains 1 220*c4762a1bSJed Brown output_file: output/ex183_2_1.out 221*c4762a1bSJed Brown 222*c4762a1bSJed Brown test: 223*c4762a1bSJed Brown suffix: 2 224*c4762a1bSJed Brown nsize: 3 225*c4762a1bSJed Brown args: -total_subdomains 2 226*c4762a1bSJed Brown output_file: output/ex183_3_2.out 227*c4762a1bSJed Brown 228*c4762a1bSJed Brown test: 229*c4762a1bSJed Brown suffix: 3 230*c4762a1bSJed Brown nsize: 4 231*c4762a1bSJed Brown args: -total_subdomains 2 232*c4762a1bSJed Brown output_file: output/ex183_4_2.out 233*c4762a1bSJed Brown 234*c4762a1bSJed Brown test: 235*c4762a1bSJed Brown suffix: 4 236*c4762a1bSJed Brown nsize: 6 237*c4762a1bSJed Brown args: -total_subdomains 2 238*c4762a1bSJed Brown output_file: output/ex183_6_2.out 239*c4762a1bSJed Brown 240*c4762a1bSJed Brown TEST*/ 241