1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown /*T 6c4762a1bSJed Brown Concepts: Mat^mat submatrix, parallel 7c4762a1bSJed Brown Processors: n 8c4762a1bSJed Brown T*/ 9c4762a1bSJed Brown 10c4762a1bSJed Brown 11c4762a1bSJed Brown #include <petscmat.h> 12c4762a1bSJed Brown 13c4762a1bSJed Brown int main(int argc, char **args) 14c4762a1bSJed Brown { 15c4762a1bSJed Brown Mat A,*submats; 16c4762a1bSJed Brown MPI_Comm subcomm; 17c4762a1bSJed Brown PetscMPIInt rank,size,subrank,subsize,color; 18c4762a1bSJed Brown PetscInt m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1; 19c4762a1bSJed Brown PetscInt nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs; 20c4762a1bSJed Brown PetscInt *rowindices,*colindices,idx,rep; 21c4762a1bSJed Brown PetscScalar *vals; 22c4762a1bSJed Brown IS rowis[1],colis[1]; 23c4762a1bSJed Brown PetscViewer viewer; 24c4762a1bSJed Brown PetscBool permute_indices,flg; 25c4762a1bSJed Brown PetscErrorCode ierr; 26c4762a1bSJed Brown 27c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 28*ffc4695bSBarry Smith ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 29*ffc4695bSBarry Smith ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr); 30c4762a1bSJed Brown 31c4762a1bSJed Brown ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");CHKERRQ(ierr); 32c4762a1bSJed Brown m = 5; 33c4762a1bSJed Brown ierr = PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg);CHKERRQ(ierr); 34c4762a1bSJed Brown total_subdomains = size-1; 35c4762a1bSJed Brown ierr = PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg);CHKERRQ(ierr); 36c4762a1bSJed Brown permute_indices = PETSC_FALSE; 37c4762a1bSJed Brown ierr = PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg);CHKERRQ(ierr); 38c4762a1bSJed Brown hash = 7; 39c4762a1bSJed Brown ierr = PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg);CHKERRQ(ierr); 40c4762a1bSJed Brown rep = 2; 41c4762a1bSJed Brown ierr = PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg);CHKERRQ(ierr); 42c4762a1bSJed Brown ierr = PetscOptionsEnd();CHKERRQ(ierr); 43c4762a1bSJed Brown 44c4762a1bSJed Brown if (total_subdomains > size) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %D must not exceed comm size %D",total_subdomains,size); 45c4762a1bSJed Brown if (total_subdomains < 1 || total_subdomains > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %D (comm size), got total_subdomains = %D",size,total_subdomains); 46c4762a1bSJed Brown if (rep != 1 && rep != 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %D; must be 1 or 2",rep); 47c4762a1bSJed Brown 48c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 49c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 50c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 51c4762a1bSJed Brown ierr = MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 52c4762a1bSJed Brown ierr = MatSetFromOptions(A);CHKERRQ(ierr); 53c4762a1bSJed Brown ierr = MatSetUp(A);CHKERRQ(ierr); 54c4762a1bSJed Brown ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 55c4762a1bSJed Brown ierr = MatGetLocalSize(A,NULL,&n);CHKERRQ(ierr); 56c4762a1bSJed Brown ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 57c4762a1bSJed Brown ierr = MatSeqAIJSetPreallocation(A,n,NULL);CHKERRQ(ierr); 58c4762a1bSJed Brown ierr = MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL);CHKERRQ(ierr); 59c4762a1bSJed Brown ierr = MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 60c4762a1bSJed Brown ierr = MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 61c4762a1bSJed Brown ierr = MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr); 62c4762a1bSJed Brown ierr = MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr); 63c4762a1bSJed Brown 64c4762a1bSJed Brown ierr = PetscMalloc2(N,&cols,N,&vals);CHKERRQ(ierr); 65c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 66c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 67c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 68c4762a1bSJed Brown for (j=0;j<N;++j) { 69c4762a1bSJed Brown vals[j] = i*10000+j; 70c4762a1bSJed Brown } 71c4762a1bSJed Brown ierr = MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 72c4762a1bSJed Brown } 73c4762a1bSJed Brown ierr = PetscFree2(cols,vals);CHKERRQ(ierr); 74c4762a1bSJed Brown ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 75c4762a1bSJed Brown ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 76c4762a1bSJed Brown 77c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Initial matrix:\n");CHKERRQ(ierr); 78c4762a1bSJed Brown ierr = MatView(A,viewer);CHKERRQ(ierr); 79c4762a1bSJed Brown 80c4762a1bSJed Brown 81c4762a1bSJed Brown /* 82c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 83c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 84c4762a1bSJed Brown or selects indices by scrambling them using a hash. 85c4762a1bSJed Brown */ 86c4762a1bSJed Brown k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */ 87c4762a1bSJed Brown color = rank/k; 88*ffc4695bSBarry Smith ierr = MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm);CHKERRMPI(ierr); 89*ffc4695bSBarry Smith ierr = MPI_Comm_size(subcomm,&subsize);CHKERRMPI(ierr); 90*ffc4695bSBarry Smith ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRMPI(ierr); 91c4762a1bSJed Brown ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 92c4762a1bSJed Brown nis = 1; 93c4762a1bSJed Brown ierr = PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices);CHKERRQ(ierr); 94c4762a1bSJed Brown 95c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 96c4762a1bSJed Brown if (permute_indices) { 97c4762a1bSJed Brown idx = (j*hash); 98c4762a1bSJed Brown } else { 99c4762a1bSJed Brown idx = j; 100c4762a1bSJed Brown } 101c4762a1bSJed Brown rowindices[j-rstart] = idx%N; 102c4762a1bSJed Brown colindices[j-rstart] = (idx+m)%N; 103c4762a1bSJed Brown } 104c4762a1bSJed Brown ierr = ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0]);CHKERRQ(ierr); 105c4762a1bSJed Brown ierr = ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0]);CHKERRQ(ierr); 106c4762a1bSJed Brown ierr = ISSort(rowis[0]);CHKERRQ(ierr); 107c4762a1bSJed Brown ierr = ISSort(colis[0]);CHKERRQ(ierr); 108c4762a1bSJed Brown ierr = PetscFree2(rowindices,colindices);CHKERRQ(ierr); 109c4762a1bSJed Brown /* 110c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 111c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 112c4762a1bSJed Brown number to be viewed. 113c4762a1bSJed Brown */ 114c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Subdomains");CHKERRQ(ierr); 115c4762a1bSJed Brown if (permute_indices) { 116c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer," (hash=%D)",hash);CHKERRQ(ierr); 117c4762a1bSJed Brown } 118c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,":\n");CHKERRQ(ierr); 119c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 120c4762a1bSJed Brown 121c4762a1bSJed Brown nsubdomains = 1; 122c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 123c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 124c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 125c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 126c4762a1bSJed Brown if (s < nsubdomains) { 127c4762a1bSJed Brown PetscInt ss; 128c4762a1bSJed Brown ss = gsubdomainperm[s]; 129c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 130c4762a1bSJed Brown PetscViewer subviewer = NULL; 131c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 132c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(subviewer,"Row IS %D\n",gs);CHKERRQ(ierr); 133c4762a1bSJed Brown ierr = ISView(rowis[ss],subviewer);CHKERRQ(ierr); 134c4762a1bSJed Brown ierr = PetscViewerFlush(subviewer);CHKERRQ(ierr); 135c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(subviewer,"Col IS %D\n",gs);CHKERRQ(ierr); 136c4762a1bSJed Brown ierr = ISView(colis[ss],subviewer);CHKERRQ(ierr); 137c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr); 138c4762a1bSJed Brown ++s; 139c4762a1bSJed Brown } 140c4762a1bSJed Brown } 141*ffc4695bSBarry Smith ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRMPI(ierr); 142c4762a1bSJed Brown } 143c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 144c4762a1bSJed Brown ierr = ISSort(rowis[0]);CHKERRQ(ierr); 145c4762a1bSJed Brown ierr = ISSort(colis[0]);CHKERRQ(ierr); 146c4762a1bSJed Brown nsubdomains = 1; 147c4762a1bSJed Brown ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats);CHKERRQ(ierr); 148c4762a1bSJed Brown /* 149c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 150c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 151c4762a1bSJed Brown number to be viewed. 152c4762a1bSJed Brown */ 153c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n");CHKERRQ(ierr); 154c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 155c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 156c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 157c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 158c4762a1bSJed Brown if (s < nsubdomains) { 159c4762a1bSJed Brown PetscInt ss; 160c4762a1bSJed Brown ss = gsubdomainperm[s]; 161c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 162c4762a1bSJed Brown PetscViewer subviewer = NULL; 163c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 164c4762a1bSJed Brown ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 165c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 166c4762a1bSJed Brown ++s; 167c4762a1bSJed Brown } 168c4762a1bSJed Brown } 169*ffc4695bSBarry Smith ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRMPI(ierr); 170c4762a1bSJed Brown } 171c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172c4762a1bSJed Brown if (rep == 1) goto cleanup; 173c4762a1bSJed Brown nsubdomains = 1; 174c4762a1bSJed Brown ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr); 175c4762a1bSJed Brown /* 176c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 177c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 178c4762a1bSJed Brown number to be viewed. 179c4762a1bSJed Brown */ 180c4762a1bSJed Brown ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n");CHKERRQ(ierr); 181c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 182c4762a1bSJed Brown ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr); 183c4762a1bSJed Brown ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr); 184c4762a1bSJed Brown for (gs=0,s=0; gs < gnsubdomains;++gs) { 185c4762a1bSJed Brown if (s < nsubdomains) { 186c4762a1bSJed Brown PetscInt ss; 187c4762a1bSJed Brown ss = gsubdomainperm[s]; 188c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 189c4762a1bSJed Brown PetscViewer subviewer = NULL; 190c4762a1bSJed Brown ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 191c4762a1bSJed Brown ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr); 192c4762a1bSJed Brown ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr); 193c4762a1bSJed Brown ++s; 194c4762a1bSJed Brown } 195c4762a1bSJed Brown } 196*ffc4695bSBarry Smith ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRMPI(ierr); 197c4762a1bSJed Brown } 198c4762a1bSJed Brown ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 199c4762a1bSJed Brown cleanup: 200c4762a1bSJed Brown for (k=0;k<nsubdomains;++k) { 201c4762a1bSJed Brown ierr = MatDestroy(submats+k);CHKERRQ(ierr); 202c4762a1bSJed Brown } 203c4762a1bSJed Brown ierr = PetscFree(submats);CHKERRQ(ierr); 204c4762a1bSJed Brown for (k=0;k<nis;++k) { 205c4762a1bSJed Brown ierr = ISDestroy(rowis+k);CHKERRQ(ierr); 206c4762a1bSJed Brown ierr = ISDestroy(colis+k);CHKERRQ(ierr); 207c4762a1bSJed Brown } 208c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 209*ffc4695bSBarry Smith ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 210c4762a1bSJed Brown ierr = PetscFinalize(); 211c4762a1bSJed Brown return ierr; 212c4762a1bSJed Brown } 213c4762a1bSJed Brown 214c4762a1bSJed Brown 215c4762a1bSJed Brown /*TEST 216c4762a1bSJed Brown 217c4762a1bSJed Brown test: 218c4762a1bSJed Brown nsize: 2 219c4762a1bSJed Brown args: -total_subdomains 1 220c4762a1bSJed Brown output_file: output/ex183_2_1.out 221c4762a1bSJed Brown 222c4762a1bSJed Brown test: 223c4762a1bSJed Brown suffix: 2 224c4762a1bSJed Brown nsize: 3 225c4762a1bSJed Brown args: -total_subdomains 2 226c4762a1bSJed Brown output_file: output/ex183_3_2.out 227c4762a1bSJed Brown 228c4762a1bSJed Brown test: 229c4762a1bSJed Brown suffix: 3 230c4762a1bSJed Brown nsize: 4 231c4762a1bSJed Brown args: -total_subdomains 2 232c4762a1bSJed Brown output_file: output/ex183_4_2.out 233c4762a1bSJed Brown 234c4762a1bSJed Brown test: 235c4762a1bSJed Brown suffix: 4 236c4762a1bSJed Brown nsize: 6 237c4762a1bSJed Brown args: -total_subdomains 2 238c4762a1bSJed Brown output_file: output/ex183_6_2.out 239c4762a1bSJed Brown 240c4762a1bSJed Brown TEST*/ 241