1c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n" 2c4762a1bSJed Brown "This test can only be run in parallel.\n" 3c4762a1bSJed Brown "\n"; 4c4762a1bSJed Brown 5c4762a1bSJed Brown #include <petscmat.h> 6c4762a1bSJed Brown 7*fe8fb074SBarry Smith PetscErrorCode MyISView(IS *rowis, IS *colis, PetscInt gs, PetscInt ss, PetscViewer viewer) 8*fe8fb074SBarry Smith { 9*fe8fb074SBarry Smith PetscViewer subviewer = NULL; 10*fe8fb074SBarry Smith 11*fe8fb074SBarry Smith PetscFunctionBeginUser; 12*fe8fb074SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Row IS %" PetscInt_FMT "\n", gs)); 13*fe8fb074SBarry Smith if (ss > -1) { 14*fe8fb074SBarry Smith PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 15*fe8fb074SBarry Smith PetscCall(ISView(rowis[ss], subviewer)); 16*fe8fb074SBarry Smith PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 17*fe8fb074SBarry Smith } 18*fe8fb074SBarry Smith PetscCall(PetscViewerFlush(viewer)); 19*fe8fb074SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Col IS %" PetscInt_FMT "\n", gs)); 20*fe8fb074SBarry Smith if (ss > -1) { 21*fe8fb074SBarry Smith PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 22*fe8fb074SBarry Smith PetscCall(ISView(colis[ss], subviewer)); 23*fe8fb074SBarry Smith PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer)); 24*fe8fb074SBarry Smith } 25*fe8fb074SBarry Smith PetscCall(PetscViewerFlush(viewer)); 26*fe8fb074SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 27*fe8fb074SBarry Smith } 28*fe8fb074SBarry Smith 29d71ae5a4SJacob Faibussowitsch int main(int argc, char **args) 30d71ae5a4SJacob Faibussowitsch { 31c4762a1bSJed Brown Mat A, *submats; 32c4762a1bSJed Brown MPI_Comm subcomm; 33c4762a1bSJed Brown PetscMPIInt rank, size, subrank, subsize, color; 34c4762a1bSJed Brown PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1; 35c4762a1bSJed Brown PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs; 36c4762a1bSJed Brown PetscInt *rowindices, *colindices, idx, rep; 37c4762a1bSJed Brown PetscScalar *vals; 38c4762a1bSJed Brown IS rowis[1], colis[1]; 39c4762a1bSJed Brown PetscViewer viewer; 40c4762a1bSJed Brown PetscBool permute_indices, flg; 41c4762a1bSJed Brown 42327415f7SBarry Smith PetscFunctionBeginUser; 439566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 449566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 46c4762a1bSJed Brown 47d0609cedSBarry Smith PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat"); 48c4762a1bSJed Brown m = 5; 499566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg)); 50c4762a1bSJed Brown total_subdomains = size - 1; 519566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg)); 52c4762a1bSJed Brown permute_indices = PETSC_FALSE; 539566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg)); 54c4762a1bSJed Brown hash = 7; 559566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg)); 56c4762a1bSJed Brown rep = 2; 579566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg)); 58d0609cedSBarry Smith PetscOptionsEnd(); 59c4762a1bSJed Brown 6008401ef6SPierre Jolivet PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size); 61bc30f867SBarry Smith PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains); 62bc30f867SBarry Smith PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep); 63c4762a1bSJed Brown 64c4762a1bSJed Brown viewer = PETSC_VIEWER_STDOUT_WORLD; 65c4762a1bSJed Brown /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */ 669566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 679566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE)); 689566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 699566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 709566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, NULL, &N)); 719566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, NULL, &n)); 729566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 739566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, n, NULL)); 749566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL)); 759566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL)); 769566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 779566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL)); 789566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL)); 79c4762a1bSJed Brown 809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(N, &cols, N, &vals)); 819566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 82c4762a1bSJed Brown for (j = 0; j < N; ++j) cols[j] = j; 83c4762a1bSJed Brown for (i = rstart; i < rend; i++) { 84ad540459SPierre Jolivet for (j = 0; j < N; ++j) vals[j] = i * 10000 + j; 859566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES)); 86c4762a1bSJed Brown } 879566063dSJacob Faibussowitsch PetscCall(PetscFree2(cols, vals)); 889566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 899566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 90c4762a1bSJed Brown 919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n")); 929566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 93c4762a1bSJed Brown 94c4762a1bSJed Brown /* 95c4762a1bSJed Brown Create subcomms and ISs so that each rank participates in one IS. 96c4762a1bSJed Brown The IS either coalesces adjacent rank indices (contiguous), 97c4762a1bSJed Brown or selects indices by scrambling them using a hash. 98c4762a1bSJed Brown */ 99c4762a1bSJed Brown k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */ 100c4762a1bSJed Brown color = rank / k; 1019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm)); 1029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(subcomm, &subsize)); 1039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm, &subrank)); 1049566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 105c4762a1bSJed Brown nis = 1; 1069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices)); 107c4762a1bSJed Brown 108c4762a1bSJed Brown for (j = rstart; j < rend; ++j) { 109c4762a1bSJed Brown if (permute_indices) { 110c4762a1bSJed Brown idx = (j * hash); 111c4762a1bSJed Brown } else { 112c4762a1bSJed Brown idx = j; 113c4762a1bSJed Brown } 114c4762a1bSJed Brown rowindices[j - rstart] = idx % N; 115c4762a1bSJed Brown colindices[j - rstart] = (idx + m) % N; 116c4762a1bSJed Brown } 1179566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0])); 1189566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0])); 1199566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1209566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 1219566063dSJacob Faibussowitsch PetscCall(PetscFree2(rowindices, colindices)); 122c4762a1bSJed Brown /* 123c4762a1bSJed Brown Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms, 124c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 125c4762a1bSJed Brown number to be viewed. 126c4762a1bSJed Brown */ 1279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains")); 12848a46eb9SPierre Jolivet if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash)); 1299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, ":\n")); 1309566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 131c4762a1bSJed Brown 132c4762a1bSJed Brown nsubdomains = 1; 133c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1349566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums)); 1359566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 136c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 137c4762a1bSJed Brown PetscInt ss; 138*fe8fb074SBarry Smith if (s < nsubdomains) { 139c4762a1bSJed Brown ss = gsubdomainperm[s]; 140c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 141c4762a1bSJed Brown ++s; 142*fe8fb074SBarry Smith } else ss = -1; 143*fe8fb074SBarry Smith } else ss = -1; 144*fe8fb074SBarry Smith PetscCall(MyISView(rowis, colis, gs, ss, viewer)); 145c4762a1bSJed Brown } 1469566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 1479566063dSJacob Faibussowitsch PetscCall(ISSort(rowis[0])); 1489566063dSJacob Faibussowitsch PetscCall(ISSort(colis[0])); 149c4762a1bSJed Brown nsubdomains = 1; 1509566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats)); 151c4762a1bSJed Brown /* 152c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 153c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 154c4762a1bSJed Brown number to be viewed. 155c4762a1bSJed Brown */ 1569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n")); 157c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1589566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 1599566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 160c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 161c4762a1bSJed Brown if (s < nsubdomains) { 162c4762a1bSJed Brown PetscInt ss; 163c4762a1bSJed Brown ss = gsubdomainperm[s]; 164c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 165c4762a1bSJed Brown PetscViewer subviewer = NULL; 1669566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 1679566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss], subviewer)); 1689566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 169c4762a1bSJed Brown ++s; 170c4762a1bSJed Brown } 171c4762a1bSJed Brown } 1729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 173c4762a1bSJed Brown } 1749566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 175c4762a1bSJed Brown if (rep == 1) goto cleanup; 176c4762a1bSJed Brown nsubdomains = 1; 1779566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats)); 178c4762a1bSJed Brown /* 179c4762a1bSJed Brown Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms, 180c4762a1bSJed Brown we need to obtain the global numbers of our local objects and wait for the corresponding global 181c4762a1bSJed Brown number to be viewed. 182c4762a1bSJed Brown */ 1839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n")); 184c4762a1bSJed Brown for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s; 1859566063dSJacob Faibussowitsch PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums)); 1869566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm)); 187c4762a1bSJed Brown for (gs = 0, s = 0; gs < gnsubdomains; ++gs) { 188c4762a1bSJed Brown if (s < nsubdomains) { 189c4762a1bSJed Brown PetscInt ss; 190c4762a1bSJed Brown ss = gsubdomainperm[s]; 191c4762a1bSJed Brown if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */ 192c4762a1bSJed Brown PetscViewer subviewer = NULL; 1939566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 1949566063dSJacob Faibussowitsch PetscCall(MatView(submats[ss], subviewer)); 1959566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer)); 196c4762a1bSJed Brown ++s; 197c4762a1bSJed Brown } 198c4762a1bSJed Brown } 1999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 200c4762a1bSJed Brown } 2019566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 202c4762a1bSJed Brown cleanup: 20348a46eb9SPierre Jolivet for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k)); 2049566063dSJacob Faibussowitsch PetscCall(PetscFree(submats)); 205c4762a1bSJed Brown for (k = 0; k < nis; ++k) { 2069566063dSJacob Faibussowitsch PetscCall(ISDestroy(rowis + k)); 2079566063dSJacob Faibussowitsch PetscCall(ISDestroy(colis + k)); 208c4762a1bSJed Brown } 2099566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 2109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm)); 2119566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 212b122ec5aSJacob Faibussowitsch return 0; 213c4762a1bSJed Brown } 214c4762a1bSJed Brown 215c4762a1bSJed Brown /*TEST 216c4762a1bSJed Brown 217c4762a1bSJed Brown test: 218c4762a1bSJed Brown nsize: 2 219c4762a1bSJed Brown args: -total_subdomains 1 220c4762a1bSJed Brown output_file: output/ex183_2_1.out 221c4762a1bSJed Brown 222c4762a1bSJed Brown test: 223c4762a1bSJed Brown suffix: 2 224c4762a1bSJed Brown nsize: 3 225c4762a1bSJed Brown args: -total_subdomains 2 226c4762a1bSJed Brown output_file: output/ex183_3_2.out 227c4762a1bSJed Brown 228c4762a1bSJed Brown test: 229c4762a1bSJed Brown suffix: 3 230c4762a1bSJed Brown nsize: 4 231c4762a1bSJed Brown args: -total_subdomains 2 232c4762a1bSJed Brown output_file: output/ex183_4_2.out 233c4762a1bSJed Brown 234c4762a1bSJed Brown test: 235c4762a1bSJed Brown suffix: 4 236c4762a1bSJed Brown nsize: 6 237c4762a1bSJed Brown args: -total_subdomains 2 238c4762a1bSJed Brown output_file: output/ex183_6_2.out 239c4762a1bSJed Brown 240c4762a1bSJed Brown TEST*/ 241