1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown #include <petscmat.h> 6c4762a1bSJed Brown 7c4762a1bSJed Brown int main(int argc, char **args) 8c4762a1bSJed Brown { 9c4762a1bSJed Brown Mat A,*submats; 10c4762a1bSJed Brown MPI_Comm subcomm; 11c4762a1bSJed Brown PetscMPIInt rank,size,subrank,subsize,color; 12c4762a1bSJed Brown PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 13c4762a1bSJed Brown PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 14c4762a1bSJed Brown PetscInt *rowindices,*colindices,idx,rep; 15c4762a1bSJed Brown PetscScalar *vals; 16c4762a1bSJed Brown IS rowis[1],colis[1]; 17c4762a1bSJed Brown PetscViewer viewer; 18c4762a1bSJed Brown PetscBool permute_indices,flg; 19c4762a1bSJed Brown 20*327415f7SBarry Smith PetscFunctionBeginUser; 219566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 24c4762a1bSJed Brown 25d0609cedSBarry Smith PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat"); 26c4762a1bSJed Brown m = 5; 279566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg)); 28c4762a1bSJed Brown total_subdomains = size-1; 299566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg)); 30c4762a1bSJed Brown permute_indices = PETSC_FALSE; 319566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg)); 32c4762a1bSJed Brown hash = 7; 339566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg)); 34c4762a1bSJed Brown rep = 2; 359566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg)); 36d0609cedSBarry Smith PetscOptionsEnd(); 37c4762a1bSJed Brown 3808401ef6SPierre Jolivet PetscCheck(total_subdomains <= size,PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %" PetscInt_FMT " must not exceed comm size %d",total_subdomains,size); 39bc30f867SBarry Smith PetscCheck(total_subdomains >= 1 && total_subdomains <= size,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT,size,total_subdomains); 40bc30f867SBarry Smith PetscCheck(rep == 1 || rep == 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2",rep); 41c4762a1bSJed Brown 42c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 43c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 449566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE)); 469566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 479566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 489566063dSJacob Faibussowitsch PetscCall(MatGetSize(A,NULL,&N)); 499566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A,NULL,&n)); 509566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A,&bs)); 519566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,n,NULL)); 529566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL)); 539566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL)); 549566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 559566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL)); 569566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 57c4762a1bSJed Brown 589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(N,&cols,N,&vals)); 599566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 60c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 61c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 62c4762a1bSJed Brown for (j=0;j<N;++j) { 63c4762a1bSJed Brown vals[j] = i*10000+j; 64c4762a1bSJed Brown } 659566063dSJacob Faibussowitsch PetscCall(MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES)); 66c4762a1bSJed Brown } 679566063dSJacob Faibussowitsch PetscCall(PetscFree2(cols,vals)); 689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 70c4762a1bSJed Brown 719566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Initial matrix:\n")); 729566063dSJacob Faibussowitsch PetscCall(MatView(A,viewer)); 73c4762a1bSJed Brown 74c4762a1bSJed Brown /* 75c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 76c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 77c4762a1bSJed Brown or selects indices by scrambling them using a hash. 78c4762a1bSJed Brown */ 79c4762a1bSJed Brown k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 80c4762a1bSJed Brown color = rank/k; 819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm)); 829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(subcomm,&subsize)); 839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm,&subrank)); 849566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 85c4762a1bSJed Brown nis = 1; 869566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices)); 87c4762a1bSJed Brown 88c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 89c4762a1bSJed Brown if (permute_indices) { 90c4762a1bSJed Brown idx = (j*hash); 91c4762a1bSJed Brown } else { 92c4762a1bSJed Brown idx = j; 93c4762a1bSJed Brown } 94c4762a1bSJed Brown rowindices[j-rstart] = idx%N; 95c4762a1bSJed Brown colindices[j-rstart] = (idx+m)%N; 96c4762a1bSJed Brown } 979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0])); 989566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0])); 999566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1009566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 1019566063dSJacob Faibussowitsch PetscCall(PetscFree2(rowindices,colindices)); 102c4762a1bSJed Brown /* 103c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 104c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 105c4762a1bSJed Brown number to be viewed. 106c4762a1bSJed Brown */ 1079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Subdomains")); 108c4762a1bSJed Brown if (permute_indices) { 1099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer," (hash=%" PetscInt_FMT ")",hash)); 110c4762a1bSJed Brown } 1119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,":\n")); 1129566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 113c4762a1bSJed Brown 114c4762a1bSJed Brown nsubdomains = 1; 115c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1169566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums)); 1179566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 118c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 119c4762a1bSJed Brown if (s < nsubdomains) { 120c4762a1bSJed Brown PetscInt ss; 121c4762a1bSJed Brown ss = gsubdomainperm[s]; 122c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 123c4762a1bSJed Brown PetscViewer subviewer = NULL; 1249566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 1259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer,"Row IS %" PetscInt_FMT "\n",gs)); 1269566063dSJacob Faibussowitsch PetscCall(ISView(rowis[ss],subviewer)); 1279566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(subviewer)); 1289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer,"Col IS %" PetscInt_FMT "\n",gs)); 1299566063dSJacob Faibussowitsch PetscCall(ISView(colis[ss],subviewer)); 1309566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 131c4762a1bSJed Brown ++s; 132c4762a1bSJed Brown } 133c4762a1bSJed Brown } 1349566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 135c4762a1bSJed Brown } 1369566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 1379566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1389566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 139c4762a1bSJed Brown nsubdomains = 1; 1409566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats)); 141c4762a1bSJed Brown /* 142c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 143c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 144c4762a1bSJed Brown number to be viewed. 145c4762a1bSJed Brown */ 1469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n")); 147c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1489566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 1499566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 150c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 151c4762a1bSJed Brown if (s < nsubdomains) { 152c4762a1bSJed Brown PetscInt ss; 153c4762a1bSJed Brown ss = gsubdomainperm[s]; 154c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 155c4762a1bSJed Brown PetscViewer subviewer = NULL; 1569566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 1579566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss],subviewer)); 1589566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 159c4762a1bSJed Brown ++s; 160c4762a1bSJed Brown } 161c4762a1bSJed Brown } 1629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 163c4762a1bSJed Brown } 1649566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 165c4762a1bSJed Brown if (rep == 1) goto cleanup; 166c4762a1bSJed Brown nsubdomains = 1; 1679566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats)); 168c4762a1bSJed Brown /* 169c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 170c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 171c4762a1bSJed Brown number to be viewed. 172c4762a1bSJed Brown */ 1739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n")); 174c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1759566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 1769566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 177c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 178c4762a1bSJed Brown if (s < nsubdomains) { 179c4762a1bSJed Brown PetscInt ss; 180c4762a1bSJed Brown ss = gsubdomainperm[s]; 181c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 182c4762a1bSJed Brown PetscViewer subviewer = NULL; 1839566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 1849566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss],subviewer)); 1859566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 186c4762a1bSJed Brown ++s; 187c4762a1bSJed Brown } 188c4762a1bSJed Brown } 1899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 190c4762a1bSJed Brown } 1919566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 192c4762a1bSJed Brown cleanup: 193c4762a1bSJed Brown for (k=0;k<nsubdomains;++k) { 1949566063dSJacob Faibussowitsch PetscCall(MatDestroy(submats+k)); 195c4762a1bSJed Brown } 1969566063dSJacob Faibussowitsch PetscCall(PetscFree(submats)); 197c4762a1bSJed Brown for (k=0;k<nis;++k) { 1989566063dSJacob Faibussowitsch PetscCall(ISDestroy(rowis+k)); 1999566063dSJacob Faibussowitsch PetscCall(ISDestroy(colis+k)); 200c4762a1bSJed Brown } 2019566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 2029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm)); 2039566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 204b122ec5aSJacob Faibussowitsch return 0; 205c4762a1bSJed Brown } 206c4762a1bSJed Brown 207c4762a1bSJed Brown /*TEST 208c4762a1bSJed Brown 209c4762a1bSJed Brown test: 210c4762a1bSJed Brown nsize: 2 211c4762a1bSJed Brown args: -total_subdomains 1 212c4762a1bSJed Brown output_file: output/ex183_2_1.out 213c4762a1bSJed Brown 214c4762a1bSJed Brown test: 215c4762a1bSJed Brown suffix: 2 216c4762a1bSJed Brown nsize: 3 217c4762a1bSJed Brown args: -total_subdomains 2 218c4762a1bSJed Brown output_file: output/ex183_3_2.out 219c4762a1bSJed Brown 220c4762a1bSJed Brown test: 221c4762a1bSJed Brown suffix: 3 222c4762a1bSJed Brown nsize: 4 223c4762a1bSJed Brown args: -total_subdomains 2 224c4762a1bSJed Brown output_file: output/ex183_4_2.out 225c4762a1bSJed Brown 226c4762a1bSJed Brown test: 227c4762a1bSJed Brown suffix: 4 228c4762a1bSJed Brown nsize: 6 229c4762a1bSJed Brown args: -total_subdomains 2 230c4762a1bSJed Brown output_file: output/ex183_6_2.out 231c4762a1bSJed Brown 232c4762a1bSJed Brown TEST*/ 233