1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown /*T 6c4762a1bSJed Brown Concepts: Mat^mat submatrix, parallel 7c4762a1bSJed Brown Processors: n 8c4762a1bSJed Brown T*/ 9c4762a1bSJed Brown 10c4762a1bSJed Brown #include <petscmat.h> 11c4762a1bSJed Brown 12c4762a1bSJed Brown int main(int argc, char **args) 13c4762a1bSJed Brown { 14c4762a1bSJed Brown Mat A,*submats; 15c4762a1bSJed Brown MPI_Comm subcomm; 16c4762a1bSJed Brown PetscMPIInt rank,size,subrank,subsize,color; 17c4762a1bSJed Brown PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 18c4762a1bSJed Brown PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 19c4762a1bSJed Brown PetscInt *rowindices,*colindices,idx,rep; 20c4762a1bSJed Brown PetscScalar *vals; 21c4762a1bSJed Brown IS rowis[1],colis[1]; 22c4762a1bSJed Brown PetscViewer viewer; 23c4762a1bSJed Brown PetscBool permute_indices,flg; 24c4762a1bSJed Brown PetscErrorCode ierr; 25c4762a1bSJed Brown 26c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 27*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 28*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 29c4762a1bSJed Brown 30c4762a1bSJed Brown ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");CHKERRQ(ierr); 31c4762a1bSJed Brown m = 5; 32*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg)); 33c4762a1bSJed Brown total_subdomains = size-1; 34*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg)); 35c4762a1bSJed Brown permute_indices = PETSC_FALSE; 36*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg)); 37c4762a1bSJed Brown hash = 7; 38*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg)); 39c4762a1bSJed Brown rep = 2; 40*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg)); 41c4762a1bSJed Brown ierr = PetscOptionsEnd();CHKERRQ(ierr); 42c4762a1bSJed Brown 432c71b3e2SJacob Faibussowitsch PetscCheckFalse(total_subdomains > size,PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %" PetscInt_FMT " must not exceed comm size %d",total_subdomains,size); 442c71b3e2SJacob Faibussowitsch PetscCheckFalse(total_subdomains < 1 || total_subdomains > size,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT,size,total_subdomains); 452c71b3e2SJacob Faibussowitsch PetscCheckFalse(rep != 1 && rep != 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2",rep); 46c4762a1bSJed Brown 47c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 48c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 49*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(PETSC_COMM_WORLD,&A)); 50*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE)); 51*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetFromOptions(A)); 52*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(A)); 53*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSize(A,NULL,&N)); 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetLocalSize(A,NULL,&n)); 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetBlockSize(A,&bs)); 56*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation(A,n,NULL)); 57*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL)); 58*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL)); 59*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 60*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL)); 61*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL)); 62c4762a1bSJed Brown 63*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(N,&cols,N,&vals)); 64*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend)); 65c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 66c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 67c4762a1bSJed Brown for (j=0;j<N;++j) { 68c4762a1bSJed Brown vals[j] = i*10000+j; 69c4762a1bSJed Brown } 70*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES)); 71c4762a1bSJed Brown } 72*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(cols,vals)); 73*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 74*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 75c4762a1bSJed Brown 76*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"Initial matrix:\n")); 77*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(A,viewer)); 78c4762a1bSJed Brown 79c4762a1bSJed Brown /* 80c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 81c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 82c4762a1bSJed Brown or selects indices by scrambling them using a hash. 83c4762a1bSJed Brown */ 84c4762a1bSJed Brown k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 85c4762a1bSJed Brown color = rank/k; 86*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm)); 87*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(subcomm,&subsize)); 88*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(subcomm,&subrank)); 89*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend)); 90c4762a1bSJed Brown nis = 1; 91*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices)); 92c4762a1bSJed Brown 93c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 94c4762a1bSJed Brown if (permute_indices) { 95c4762a1bSJed Brown idx = (j*hash); 96c4762a1bSJed Brown } else { 97c4762a1bSJed Brown idx = j; 98c4762a1bSJed Brown } 99c4762a1bSJed Brown rowindices[j-rstart] = idx%N; 100c4762a1bSJed Brown colindices[j-rstart] = (idx+m)%N; 101c4762a1bSJed Brown } 102*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0])); 103*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0])); 104*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSort(rowis[0])); 105*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSort(colis[0])); 106*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(rowindices,colindices)); 107c4762a1bSJed Brown /* 108c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 109c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 110c4762a1bSJed Brown number to be viewed. 111c4762a1bSJed Brown */ 112*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"Subdomains")); 113c4762a1bSJed Brown if (permute_indices) { 114*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer," (hash=%" PetscInt_FMT ")",hash)); 115c4762a1bSJed Brown } 116*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,":\n")); 117*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerFlush(viewer)); 118c4762a1bSJed Brown 119c4762a1bSJed Brown nsubdomains = 1; 120c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 121*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums)); 122*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 123c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 124c4762a1bSJed Brown if (s < nsubdomains) { 125c4762a1bSJed Brown PetscInt ss; 126c4762a1bSJed Brown ss = gsubdomainperm[s]; 127c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 128c4762a1bSJed Brown PetscViewer subviewer = NULL; 129*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 130*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(subviewer,"Row IS %" PetscInt_FMT "\n",gs)); 131*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(rowis[ss],subviewer)); 132*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerFlush(subviewer)); 133*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(subviewer,"Col IS %" PetscInt_FMT "\n",gs)); 134*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(colis[ss],subviewer)); 135*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer)); 136c4762a1bSJed Brown ++s; 137c4762a1bSJed Brown } 138c4762a1bSJed Brown } 139*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Barrier(PETSC_COMM_WORLD)); 140c4762a1bSJed Brown } 141*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerFlush(viewer)); 142*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSort(rowis[0])); 143*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSort(colis[0])); 144c4762a1bSJed Brown nsubdomains = 1; 145*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats)); 146c4762a1bSJed Brown /* 147c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 148c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 149c4762a1bSJed Brown number to be viewed. 150c4762a1bSJed Brown */ 151*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n")); 152c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 153*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 154*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 155c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 156c4762a1bSJed Brown if (s < nsubdomains) { 157c4762a1bSJed Brown PetscInt ss; 158c4762a1bSJed Brown ss = gsubdomainperm[s]; 159c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 160c4762a1bSJed Brown PetscViewer subviewer = NULL; 161*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 162*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(submats[ss],subviewer)); 163*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 164c4762a1bSJed Brown ++s; 165c4762a1bSJed Brown } 166c4762a1bSJed Brown } 167*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Barrier(PETSC_COMM_WORLD)); 168c4762a1bSJed Brown } 169*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerFlush(viewer)); 170c4762a1bSJed Brown if (rep == 1) goto cleanup; 171c4762a1bSJed Brown nsubdomains = 1; 172*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats)); 173c4762a1bSJed Brown /* 174c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 175c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 176c4762a1bSJed Brown number to be viewed. 177c4762a1bSJed Brown */ 178*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n")); 179c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 180*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums)); 181*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm)); 182c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 183c4762a1bSJed Brown if (s < nsubdomains) { 184c4762a1bSJed Brown PetscInt ss; 185c4762a1bSJed Brown ss = gsubdomainperm[s]; 186c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 187c4762a1bSJed Brown PetscViewer subviewer = NULL; 188*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 189*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(submats[ss],subviewer)); 190*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer)); 191c4762a1bSJed Brown ++s; 192c4762a1bSJed Brown } 193c4762a1bSJed Brown } 194*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Barrier(PETSC_COMM_WORLD)); 195c4762a1bSJed Brown } 196*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerFlush(viewer)); 197c4762a1bSJed Brown cleanup: 198c4762a1bSJed Brown for (k=0;k<nsubdomains;++k) { 199*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(submats+k)); 200c4762a1bSJed Brown } 201*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(submats)); 202c4762a1bSJed Brown for (k=0;k<nis;++k) { 203*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(rowis+k)); 204*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(colis+k)); 205c4762a1bSJed Brown } 206*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 207*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_free(&subcomm)); 208c4762a1bSJed Brown ierr = PetscFinalize(); 209c4762a1bSJed Brown return ierr; 210c4762a1bSJed Brown } 211c4762a1bSJed Brown 212c4762a1bSJed Brown /*TEST 213c4762a1bSJed Brown 214c4762a1bSJed Brown test: 215c4762a1bSJed Brown nsize: 2 216c4762a1bSJed Brown args: -total_subdomains 1 217c4762a1bSJed Brown output_file: output/ex183_2_1.out 218c4762a1bSJed Brown 219c4762a1bSJed Brown test: 220c4762a1bSJed Brown suffix: 2 221c4762a1bSJed Brown nsize: 3 222c4762a1bSJed Brown args: -total_subdomains 2 223c4762a1bSJed Brown output_file: output/ex183_3_2.out 224c4762a1bSJed Brown 225c4762a1bSJed Brown test: 226c4762a1bSJed Brown suffix: 3 227c4762a1bSJed Brown nsize: 4 228c4762a1bSJed Brown args: -total_subdomains 2 229c4762a1bSJed Brown output_file: output/ex183_4_2.out 230c4762a1bSJed Brown 231c4762a1bSJed Brown test: 232c4762a1bSJed Brown suffix: 4 233c4762a1bSJed Brown nsize: 6 234c4762a1bSJed Brown args: -total_subdomains 2 235c4762a1bSJed Brown output_file: output/ex183_6_2.out 236c4762a1bSJed Brown 237c4762a1bSJed Brown TEST*/ 238