1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown #include <petscmat.h> 6c4762a1bSJed Brown 7c4762a1bSJed Brown int main(int argc, char **args) 8c4762a1bSJed Brown { 9c4762a1bSJed Brown Mat A,*submats; 10c4762a1bSJed Brown MPI_Comm subcomm; 11c4762a1bSJed Brown PetscMPIInt rank,size,subrank,subsize,color; 12c4762a1bSJed Brown PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 13c4762a1bSJed Brown PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 14c4762a1bSJed Brown PetscInt *rowindices,*colindices,idx,rep; 15c4762a1bSJed Brown PetscScalar *vals; 16c4762a1bSJed Brown IS rowis[1],colis[1]; 17c4762a1bSJed Brown PetscViewer viewer; 18c4762a1bSJed Brown PetscBool permute_indices,flg; 19c4762a1bSJed Brown 209566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 23c4762a1bSJed Brown 24d0609cedSBarry Smith PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat"); 25c4762a1bSJed Brown m = 5; 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg)); 27c4762a1bSJed Brown total_subdomains = size-1; 289566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg)); 29c4762a1bSJed Brown permute_indices = PETSC_FALSE; 309566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg)); 31c4762a1bSJed Brown hash = 7; 329566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg)); 33c4762a1bSJed Brown rep = 2; 349566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg)); 35d0609cedSBarry Smith PetscOptionsEnd(); 36c4762a1bSJed Brown 3708401ef6SPierre Jolivet PetscCheck(total_subdomains <= size,PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %" PetscInt_FMT " must not exceed comm size %d",total_subdomains,size); 38*bc30f867SBarry Smith PetscCheck(total_subdomains >= 1 && total_subdomains <= size,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT,size,total_subdomains); 39*bc30f867SBarry Smith PetscCheck(rep == 1 || rep == 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2",rep); 40c4762a1bSJed Brown 41c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 42c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 439566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE)); 459566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 469566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 479566063dSJacob Faibussowitsch PetscCall(MatGetSize(A,NULL,&N)); 489566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A,NULL,&n)); 499566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A,&bs)); 509566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A,n,NULL)); 519566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL)); 529566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL)); 539566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 549566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL)); 559566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 56c4762a1bSJed Brown 579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(N,&cols,N,&vals)); 589566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 59c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 60c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 61c4762a1bSJed Brown for (j=0;j<N;++j) { 62c4762a1bSJed Brown vals[j] = i*10000+j; 63c4762a1bSJed Brown } 649566063dSJacob Faibussowitsch PetscCall(MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES)); 65c4762a1bSJed Brown } 669566063dSJacob Faibussowitsch PetscCall(PetscFree2(cols,vals)); 679566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 689566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 69c4762a1bSJed Brown 709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Initial matrix:\n")); 719566063dSJacob Faibussowitsch PetscCall(MatView(A,viewer)); 72c4762a1bSJed Brown 73c4762a1bSJed Brown /* 74c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 75c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 76c4762a1bSJed Brown or selects indices by scrambling them using a hash. 77c4762a1bSJed Brown */ 78c4762a1bSJed Brown k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 79c4762a1bSJed Brown color = rank/k; 809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm)); 819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(subcomm,&subsize)); 829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm,&subrank)); 839566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 84c4762a1bSJed Brown nis = 1; 859566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices)); 86c4762a1bSJed Brown 87c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 88c4762a1bSJed Brown if (permute_indices) { 89c4762a1bSJed Brown idx = (j*hash); 90c4762a1bSJed Brown } else { 91c4762a1bSJed Brown idx = j; 92c4762a1bSJed Brown } 93c4762a1bSJed Brown rowindices[j-rstart] = idx%N; 94c4762a1bSJed Brown colindices[j-rstart] = (idx+m)%N; 95c4762a1bSJed Brown } 969566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0])); 979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0])); 989566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 999566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 1009566063dSJacob Faibussowitsch PetscCall(PetscFree2(rowindices,colindices)); 101c4762a1bSJed Brown /* 102c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 103c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 104c4762a1bSJed Brown number to be viewed. 105c4762a1bSJed Brown */ 1069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Subdomains")); 107c4762a1bSJed Brown if (permute_indices) { 1089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer," (hash=%" PetscInt_FMT ")",hash)); 109c4762a1bSJed Brown } 1109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,":\n")); 1119566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 112c4762a1bSJed Brown 113c4762a1bSJed Brown nsubdomains = 1; 114c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1159566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums)); 1169566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 117c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 118c4762a1bSJed Brown if (s < nsubdomains) { 119c4762a1bSJed Brown PetscInt ss; 120c4762a1bSJed Brown ss = gsubdomainperm[s]; 121c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 122c4762a1bSJed Brown PetscViewer subviewer = NULL; 1239566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 1249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer,"Row IS %" PetscInt_FMT "\n",gs)); 1259566063dSJacob Faibussowitsch PetscCall(ISView(rowis[ss],subviewer)); 1269566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(subviewer)); 1279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer,"Col IS %" PetscInt_FMT "\n",gs)); 1289566063dSJacob Faibussowitsch PetscCall(ISView(colis[ss],subviewer)); 1299566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 130c4762a1bSJed Brown ++s; 131c4762a1bSJed Brown } 132c4762a1bSJed Brown } 1339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 134c4762a1bSJed Brown } 1359566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 1369566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1379566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 138c4762a1bSJed Brown nsubdomains = 1; 1399566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats)); 140c4762a1bSJed Brown /* 141c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 142c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 143c4762a1bSJed Brown number to be viewed. 144c4762a1bSJed Brown */ 1459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n")); 146c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1479566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 1489566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 149c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 150c4762a1bSJed Brown if (s < nsubdomains) { 151c4762a1bSJed Brown PetscInt ss; 152c4762a1bSJed Brown ss = gsubdomainperm[s]; 153c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 154c4762a1bSJed Brown PetscViewer subviewer = NULL; 1559566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 1569566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss],subviewer)); 1579566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 158c4762a1bSJed Brown ++s; 159c4762a1bSJed Brown } 160c4762a1bSJed Brown } 1619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 162c4762a1bSJed Brown } 1639566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 164c4762a1bSJed Brown if (rep == 1) goto cleanup; 165c4762a1bSJed Brown nsubdomains = 1; 1669566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats)); 167c4762a1bSJed Brown /* 168c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 169c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 170c4762a1bSJed Brown number to be viewed. 171c4762a1bSJed Brown */ 1729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n")); 173c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1749566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 1759566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 176c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 177c4762a1bSJed Brown if (s < nsubdomains) { 178c4762a1bSJed Brown PetscInt ss; 179c4762a1bSJed Brown ss = gsubdomainperm[s]; 180c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 181c4762a1bSJed Brown PetscViewer subviewer = NULL; 1829566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 1839566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss],subviewer)); 1849566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 185c4762a1bSJed Brown ++s; 186c4762a1bSJed Brown } 187c4762a1bSJed Brown } 1889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189c4762a1bSJed Brown } 1909566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 191c4762a1bSJed Brown cleanup: 192c4762a1bSJed Brown for (k=0;k<nsubdomains;++k) { 1939566063dSJacob Faibussowitsch PetscCall(MatDestroy(submats+k)); 194c4762a1bSJed Brown } 1959566063dSJacob Faibussowitsch PetscCall(PetscFree(submats)); 196c4762a1bSJed Brown for (k=0;k<nis;++k) { 1979566063dSJacob Faibussowitsch PetscCall(ISDestroy(rowis+k)); 1989566063dSJacob Faibussowitsch PetscCall(ISDestroy(colis+k)); 199c4762a1bSJed Brown } 2009566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 2019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm)); 2029566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 203b122ec5aSJacob Faibussowitsch return 0; 204c4762a1bSJed Brown } 205c4762a1bSJed Brown 206c4762a1bSJed Brown /*TEST 207c4762a1bSJed Brown 208c4762a1bSJed Brown test: 209c4762a1bSJed Brown nsize: 2 210c4762a1bSJed Brown args: -total_subdomains 1 211c4762a1bSJed Brown output_file: output/ex183_2_1.out 212c4762a1bSJed Brown 213c4762a1bSJed Brown test: 214c4762a1bSJed Brown suffix: 2 215c4762a1bSJed Brown nsize: 3 216c4762a1bSJed Brown args: -total_subdomains 2 217c4762a1bSJed Brown output_file: output/ex183_3_2.out 218c4762a1bSJed Brown 219c4762a1bSJed Brown test: 220c4762a1bSJed Brown suffix: 3 221c4762a1bSJed Brown nsize: 4 222c4762a1bSJed Brown args: -total_subdomains 2 223c4762a1bSJed Brown output_file: output/ex183_4_2.out 224c4762a1bSJed Brown 225c4762a1bSJed Brown test: 226c4762a1bSJed Brown suffix: 4 227c4762a1bSJed Brown nsize: 6 228c4762a1bSJed Brown args: -total_subdomains 2 229c4762a1bSJed Brown output_file: output/ex183_6_2.out 230c4762a1bSJed Brown 231c4762a1bSJed Brown TEST*/ 232