1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown #include <petscmat.h> 6c4762a1bSJed Brown 79371c9d4SSatish Balay int main(int argc, char **args) { 8c4762a1bSJed Brown Mat A, *submats; 9c4762a1bSJed Brown MPI_Comm subcomm; 10c4762a1bSJed Brown PetscMPIInt rank, size, subrank, subsize, color; 11c4762a1bSJed Brown PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1; 12c4762a1bSJed Brown PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs; 13c4762a1bSJed Brown PetscInt *rowindices, *colindices, idx, rep; 14c4762a1bSJed Brown PetscScalar *vals; 15c4762a1bSJed Brown IS rowis[1], colis[1]; 16c4762a1bSJed Brown PetscViewer viewer; 17c4762a1bSJed Brown PetscBool permute_indices, flg; 18c4762a1bSJed Brown 19327415f7SBarry Smith PetscFunctionBeginUser; 209566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 23c4762a1bSJed Brown 24d0609cedSBarry Smith PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat"); 25c4762a1bSJed Brown m = 5; 269566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg)); 27c4762a1bSJed Brown total_subdomains = size - 1; 289566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg)); 29c4762a1bSJed Brown permute_indices = PETSC_FALSE; 309566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg)); 31c4762a1bSJed Brown hash = 7; 329566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg)); 33c4762a1bSJed Brown rep = 2; 349566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg)); 35d0609cedSBarry Smith PetscOptionsEnd(); 36c4762a1bSJed Brown 3708401ef6SPierre Jolivet PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size); 38bc30f867SBarry Smith PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains); 39bc30f867SBarry Smith PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep); 40c4762a1bSJed Brown 41c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 42c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 439566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE)); 459566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 469566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 479566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, NULL, &N)); 489566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, NULL, &n)); 499566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 509566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, n, NULL)); 519566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL)); 529566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL)); 539566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 549566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL)); 559566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 56c4762a1bSJed Brown 579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(N, &cols, N, &vals)); 589566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 59c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 60c4762a1bSJed Brown for (i = rstart; i < rend; i++) { 619371c9d4SSatish Balay for (j = 0; j < N; ++j) { vals[j] = i * 10000 + j; } 629566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES)); 63c4762a1bSJed Brown } 649566063dSJacob Faibussowitsch PetscCall(PetscFree2(cols, vals)); 659566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 669566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 67c4762a1bSJed Brown 689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n")); 699566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 70c4762a1bSJed Brown 71c4762a1bSJed Brown /* 72c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 73c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 74c4762a1bSJed Brown or selects indices by scrambling them using a hash. 75c4762a1bSJed Brown */ 76c4762a1bSJed Brown k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */ 77c4762a1bSJed Brown color = rank / k; 789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm)); 799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(subcomm, &subsize)); 809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm, &subrank)); 819566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 82c4762a1bSJed Brown nis = 1; 839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices)); 84c4762a1bSJed Brown 85c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 86c4762a1bSJed Brown if (permute_indices) { 87c4762a1bSJed Brown idx = (j * hash); 88c4762a1bSJed Brown } else { 89c4762a1bSJed Brown idx = j; 90c4762a1bSJed Brown } 91c4762a1bSJed Brown rowindices[j - rstart] = idx % N; 92c4762a1bSJed Brown colindices[j - rstart] = (idx + m) % N; 93c4762a1bSJed Brown } 949566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0])); 959566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0])); 969566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 979566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 989566063dSJacob Faibussowitsch PetscCall(PetscFree2(rowindices, colindices)); 99c4762a1bSJed Brown /* 100c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 101c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 102c4762a1bSJed Brown number to be viewed. 103c4762a1bSJed Brown */ 1049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains")); 105*48a46eb9SPierre Jolivet if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash)); 1069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, ":\n")); 1079566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 108c4762a1bSJed Brown 109c4762a1bSJed Brown nsubdomains = 1; 110c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1119566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums)); 1129566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 113c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 114c4762a1bSJed Brown if (s < nsubdomains) { 115c4762a1bSJed Brown PetscInt ss; 116c4762a1bSJed Brown ss = gsubdomainperm[s]; 117c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 118c4762a1bSJed Brown PetscViewer subviewer = NULL; 1199566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 1209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer, "Row IS %" PetscInt_FMT "\n", gs)); 1219566063dSJacob Faibussowitsch PetscCall(ISView(rowis[ss], subviewer)); 1229566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(subviewer)); 1239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(subviewer, "Col IS %" PetscInt_FMT "\n", gs)); 1249566063dSJacob Faibussowitsch PetscCall(ISView(colis[ss], subviewer)); 1259566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 126c4762a1bSJed Brown ++s; 127c4762a1bSJed Brown } 128c4762a1bSJed Brown } 1299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 130c4762a1bSJed Brown } 1319566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 1329566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1339566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 134c4762a1bSJed Brown nsubdomains = 1; 1359566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats)); 136c4762a1bSJed Brown /* 137c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 138c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 139c4762a1bSJed Brown number to be viewed. 140c4762a1bSJed Brown */ 1419566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n")); 142c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1439566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 1449566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 145c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 146c4762a1bSJed Brown if (s < nsubdomains) { 147c4762a1bSJed Brown PetscInt ss; 148c4762a1bSJed Brown ss = gsubdomainperm[s]; 149c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 150c4762a1bSJed Brown PetscViewer subviewer = NULL; 1519566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 1529566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss], subviewer)); 1539566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 154c4762a1bSJed Brown ++s; 155c4762a1bSJed Brown } 156c4762a1bSJed Brown } 1579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 158c4762a1bSJed Brown } 1599566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 160c4762a1bSJed Brown if (rep == 1) goto cleanup; 161c4762a1bSJed Brown nsubdomains = 1; 1629566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats)); 163c4762a1bSJed Brown /* 164c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 165c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 166c4762a1bSJed Brown number to be viewed. 167c4762a1bSJed Brown */ 1689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n")); 169c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 1719566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 172c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 173c4762a1bSJed Brown if (s < nsubdomains) { 174c4762a1bSJed Brown PetscInt ss; 175c4762a1bSJed Brown ss = gsubdomainperm[s]; 176c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 177c4762a1bSJed Brown PetscViewer subviewer = NULL; 1789566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 1799566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss], subviewer)); 1809566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 181c4762a1bSJed Brown ++s; 182c4762a1bSJed Brown } 183c4762a1bSJed Brown } 1849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 185c4762a1bSJed Brown } 1869566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 187c4762a1bSJed Brown cleanup: 188*48a46eb9SPierre Jolivet for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k)); 1899566063dSJacob Faibussowitsch PetscCall(PetscFree(submats)); 190c4762a1bSJed Brown for (k = 0; k < nis; ++k) { 1919566063dSJacob Faibussowitsch PetscCall(ISDestroy(rowis + k)); 1929566063dSJacob Faibussowitsch PetscCall(ISDestroy(colis + k)); 193c4762a1bSJed Brown } 1949566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm)); 1969566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 197b122ec5aSJacob Faibussowitsch return 0; 198c4762a1bSJed Brown } 199c4762a1bSJed Brown 200c4762a1bSJed Brown /*TEST 201c4762a1bSJed Brown 202c4762a1bSJed Brown test: 203c4762a1bSJed Brown nsize: 2 204c4762a1bSJed Brown args: -total_subdomains 1 205c4762a1bSJed Brown output_file: output/ex183_2_1.out 206c4762a1bSJed Brown 207c4762a1bSJed Brown test: 208c4762a1bSJed Brown suffix: 2 209c4762a1bSJed Brown nsize: 3 210c4762a1bSJed Brown args: -total_subdomains 2 211c4762a1bSJed Brown output_file: output/ex183_3_2.out 212c4762a1bSJed Brown 213c4762a1bSJed Brown test: 214c4762a1bSJed Brown suffix: 3 215c4762a1bSJed Brown nsize: 4 216c4762a1bSJed Brown args: -total_subdomains 2 217c4762a1bSJed Brown output_file: output/ex183_4_2.out 218c4762a1bSJed Brown 219c4762a1bSJed Brown test: 220c4762a1bSJed Brown suffix: 4 221c4762a1bSJed Brown nsize: 6 222c4762a1bSJed Brown args: -total_subdomains 2 223c4762a1bSJed Brown output_file: output/ex183_6_2.out 224c4762a1bSJed Brown 225c4762a1bSJed Brown TEST*/ 226