1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 5a30f8f8cSSatish Balay 66214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 7cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 86214f412SHong Zhang #endif 9d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 10d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 11d24d4204SJose E. Roman #endif 12b147fbf3SStefano Zampini 13b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 14d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 15d71ae5a4SJacob Faibussowitsch { 16b147fbf3SStefano Zampini Mat preallocator; 17b147fbf3SStefano Zampini PetscInt r, rstart, rend; 18b147fbf3SStefano Zampini PetscInt bs, i, m, n, M, N; 19b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 20b147fbf3SStefano Zampini 21b147fbf3SStefano Zampini PetscFunctionBegin; 22b147fbf3SStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 23b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B, nm, 2); 24b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 25b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 269566063dSJacob Faibussowitsch PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 275f80ce2aSJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 28b147fbf3SStefano Zampini } 29b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B, fill, 5); 309566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(B, &bs)); 319566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &N)); 329566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, &n)); 339566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 349566063dSJacob Faibussowitsch PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 359566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(preallocator, bs)); 369566063dSJacob Faibussowitsch PetscCall(MatSetSizes(preallocator, m, n, M, N)); 379566063dSJacob Faibussowitsch PetscCall(MatSetUp(preallocator)); 389566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 39b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 40b147fbf3SStefano Zampini PetscInt ncols; 41b147fbf3SStefano Zampini const PetscInt *row; 42b147fbf3SStefano Zampini const PetscScalar *vals; 43b147fbf3SStefano Zampini 44b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 459566063dSJacob Faibussowitsch PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 469566063dSJacob Faibussowitsch PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 4748a46eb9SPierre Jolivet if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 489566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 49b147fbf3SStefano Zampini } 50b147fbf3SStefano Zampini } 519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 539566063dSJacob Faibussowitsch PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&preallocator)); 553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56b147fbf3SStefano Zampini } 57b147fbf3SStefano Zampini 58d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 59d71ae5a4SJacob Faibussowitsch { 60b147fbf3SStefano Zampini Mat B; 61b147fbf3SStefano Zampini PetscInt r; 62b147fbf3SStefano Zampini 63b147fbf3SStefano Zampini PetscFunctionBegin; 64b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 6528d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE, isdense; 66b147fbf3SStefano Zampini PetscInt bs; 67b147fbf3SStefano Zampini 689566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 699566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 709566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 719566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 729566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, bs)); 739566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 749566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 759566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 7628d58a37SPierre Jolivet if (!isdense) { 779566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 789566063dSJacob Faibussowitsch PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 799566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 8028d58a37SPierre Jolivet } else { 819566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 8228d58a37SPierre Jolivet } 8328d58a37SPierre Jolivet } else { 8428d58a37SPierre Jolivet B = *newmat; 859566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(B)); 8628d58a37SPierre Jolivet } 87b147fbf3SStefano Zampini 889566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 89b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 90b147fbf3SStefano Zampini PetscInt ncols; 91b147fbf3SStefano Zampini const PetscInt *row; 92b147fbf3SStefano Zampini const PetscScalar *vals; 93b147fbf3SStefano Zampini 949566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 959566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 96eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 97b94d7dedSBarry Smith if (A->hermitian == PETSC_BOOL3_TRUE) { 98eb1ec7c1SStefano Zampini PetscInt i; 9948a46eb9SPierre Jolivet for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 100eb1ec7c1SStefano Zampini } else { 1019566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 102eb1ec7c1SStefano Zampini } 103eb1ec7c1SStefano Zampini #else 1049566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 105eb1ec7c1SStefano Zampini #endif 1069566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 107b147fbf3SStefano Zampini } 1089566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 1099566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1109566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 111b147fbf3SStefano Zampini 112b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 1139566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 114b147fbf3SStefano Zampini } else { 115b147fbf3SStefano Zampini *newmat = B; 116b147fbf3SStefano Zampini } 1173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 118b147fbf3SStefano Zampini } 119b147fbf3SStefano Zampini 120d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 121d71ae5a4SJacob Faibussowitsch { 122f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 123a30f8f8cSSatish Balay 124a30f8f8cSSatish Balay PetscFunctionBegin; 1259566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 1269566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128a30f8f8cSSatish Balay } 129a30f8f8cSSatish Balay 130d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 131d71ae5a4SJacob Faibussowitsch { 132f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 133a30f8f8cSSatish Balay 134a30f8f8cSSatish Balay PetscFunctionBegin; 1359566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 1369566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 1373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 138a30f8f8cSSatish Balay } 139a30f8f8cSSatish Balay 140d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 141a30f8f8cSSatish Balay { \ 142a30f8f8cSSatish Balay brow = row / bs; \ 1439371c9d4SSatish Balay rp = aj + ai[brow]; \ 1449371c9d4SSatish Balay ap = aa + bs2 * ai[brow]; \ 1459371c9d4SSatish Balay rmax = aimax[brow]; \ 1469371c9d4SSatish Balay nrow = ailen[brow]; \ 147a30f8f8cSSatish Balay bcol = col / bs; \ 1489371c9d4SSatish Balay ridx = row % bs; \ 1499371c9d4SSatish Balay cidx = col % bs; \ 1509371c9d4SSatish Balay low = 0; \ 1519371c9d4SSatish Balay high = nrow; \ 152a30f8f8cSSatish Balay while (high - low > 3) { \ 153a30f8f8cSSatish Balay t = (low + high) / 2; \ 154a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 155a30f8f8cSSatish Balay else low = t; \ 156a30f8f8cSSatish Balay } \ 157a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 158a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 159a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 160a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 161a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 162a30f8f8cSSatish Balay else *bap = value; \ 163a30f8f8cSSatish Balay goto a_noinsert; \ 164a30f8f8cSSatish Balay } \ 165a30f8f8cSSatish Balay } \ 166a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 1675f80ce2aSJacob Faibussowitsch PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 168fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 169a30f8f8cSSatish Balay N = nrow++ - 1; \ 170a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 1719566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 1729566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 1739566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 174a30f8f8cSSatish Balay rp[_i] = bcol; \ 175a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 176e56f5c9eSBarry Smith A->nonzerostate++; \ 177a30f8f8cSSatish Balay a_noinsert:; \ 178a30f8f8cSSatish Balay ailen[brow] = nrow; \ 179a30f8f8cSSatish Balay } 180e5e170daSBarry Smith 181d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 182a30f8f8cSSatish Balay { \ 183a30f8f8cSSatish Balay brow = row / bs; \ 1849371c9d4SSatish Balay rp = bj + bi[brow]; \ 1859371c9d4SSatish Balay ap = ba + bs2 * bi[brow]; \ 1869371c9d4SSatish Balay rmax = bimax[brow]; \ 1879371c9d4SSatish Balay nrow = bilen[brow]; \ 188a30f8f8cSSatish Balay bcol = col / bs; \ 1899371c9d4SSatish Balay ridx = row % bs; \ 1909371c9d4SSatish Balay cidx = col % bs; \ 1919371c9d4SSatish Balay low = 0; \ 1929371c9d4SSatish Balay high = nrow; \ 193a30f8f8cSSatish Balay while (high - low > 3) { \ 194a30f8f8cSSatish Balay t = (low + high) / 2; \ 195a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 196a30f8f8cSSatish Balay else low = t; \ 197a30f8f8cSSatish Balay } \ 198a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 199a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 200a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 201a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 202a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 203a30f8f8cSSatish Balay else *bap = value; \ 204a30f8f8cSSatish Balay goto b_noinsert; \ 205a30f8f8cSSatish Balay } \ 206a30f8f8cSSatish Balay } \ 207a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 2085f80ce2aSJacob Faibussowitsch PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 209fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 210a30f8f8cSSatish Balay N = nrow++ - 1; \ 211a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2129566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2139566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2149566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 215a30f8f8cSSatish Balay rp[_i] = bcol; \ 216a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 217e56f5c9eSBarry Smith B->nonzerostate++; \ 218a30f8f8cSSatish Balay b_noinsert:; \ 219a30f8f8cSSatish Balay bilen[brow] = nrow; \ 220a30f8f8cSSatish Balay } 221a30f8f8cSSatish Balay 222a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 223da81f932SPierre Jolivet Any a(i,j) with i>j input by user is ignored or generates an error 224a30f8f8cSSatish Balay */ 225d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 226d71ae5a4SJacob Faibussowitsch { 227a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 228a30f8f8cSSatish Balay MatScalar value; 229ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 2301302d50aSBarry Smith PetscInt i, j, row, col; 231d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 232d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 233d0f46423SBarry Smith PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 234a30f8f8cSSatish Balay 235a30f8f8cSSatish Balay /* Some Variables required in the macro */ 236a30f8f8cSSatish Balay Mat A = baij->A; 237a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 2381302d50aSBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 239a30f8f8cSSatish Balay MatScalar *aa = a->a; 240a30f8f8cSSatish Balay 241a30f8f8cSSatish Balay Mat B = baij->B; 242a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 2431302d50aSBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 244a30f8f8cSSatish Balay MatScalar *ba = b->a; 245a30f8f8cSSatish Balay 2461302d50aSBarry Smith PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 2471302d50aSBarry Smith PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 248a30f8f8cSSatish Balay MatScalar *ap, *bap; 249a30f8f8cSSatish Balay 250a30f8f8cSSatish Balay /* for stash */ 2510298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 2520298fd71SBarry Smith MatScalar *v_loc = NULL; 253a30f8f8cSSatish Balay 254a30f8f8cSSatish Balay PetscFunctionBegin; 255a30f8f8cSSatish Balay if (!baij->donotstash) { 25659ffdab8SBarry Smith if (n > baij->n_loc) { 2579566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 2589566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 2599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->in_loc)); 2609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->v_loc)); 26126fbe8dcSKarl Rupp 26259ffdab8SBarry Smith baij->n_loc = n; 26359ffdab8SBarry Smith } 26459ffdab8SBarry Smith in_loc = baij->in_loc; 26559ffdab8SBarry Smith v_loc = baij->v_loc; 266a30f8f8cSSatish Balay } 267a30f8f8cSSatish Balay 268a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 269a30f8f8cSSatish Balay if (im[i] < 0) continue; 2705f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 271a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 272a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 273a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 27401b2bd88SHong Zhang if (im[i] / bs > in[j] / bs) { 27501b2bd88SHong Zhang if (a->ignore_ltriangular) { 27601b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 27726fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 27801b2bd88SHong Zhang } 279a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 280a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 2819371c9d4SSatish Balay brow = row / bs; 2829371c9d4SSatish Balay bcol = col / bs; 283a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 284db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 285db4deed7SKarl Rupp else value = v[i + j * m]; 286d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 2879566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 288f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 289f7d195e4SLawrence Mitchell continue; 290f7d195e4SLawrence Mitchell } else { 291f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 292f7d195e4SLawrence Mitchell /* off-diag entry (B) */ 293a30f8f8cSSatish Balay if (mat->was_assembled) { 29448a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 295a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 296eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 29771730473SSatish Balay col = col - 1; 298a30f8f8cSSatish Balay #else 29971730473SSatish Balay col = baij->colmap[in[j] / bs] - 1; 300a30f8f8cSSatish Balay #endif 301a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 3029566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 303a30f8f8cSSatish Balay col = in[j]; 304a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 305a30f8f8cSSatish Balay B = baij->B; 306a30f8f8cSSatish Balay b = (Mat_SeqBAIJ *)(B)->data; 3079371c9d4SSatish Balay bimax = b->imax; 3089371c9d4SSatish Balay bi = b->i; 3099371c9d4SSatish Balay bilen = b->ilen; 3109371c9d4SSatish Balay bj = b->j; 311a30f8f8cSSatish Balay ba = b->a; 31271730473SSatish Balay } else col += in[j] % bs; 313a30f8f8cSSatish Balay } else col = in[j]; 314db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 315db4deed7SKarl Rupp else value = v[i + j * m]; 316d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 3179566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 318a30f8f8cSSatish Balay } 319a30f8f8cSSatish Balay } 320a30f8f8cSSatish Balay } else { /* off processor entry */ 3215f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 322a30f8f8cSSatish Balay if (!baij->donotstash) { 3235080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 324a30f8f8cSSatish Balay n_loc = 0; 325a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 326f65c83cfSHong Zhang if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 327a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 328a30f8f8cSSatish Balay if (roworiented) { 329a30f8f8cSSatish Balay v_loc[n_loc] = v[i * n + j]; 330a30f8f8cSSatish Balay } else { 331a30f8f8cSSatish Balay v_loc[n_loc] = v[j * m + i]; 332a30f8f8cSSatish Balay } 333a30f8f8cSSatish Balay n_loc++; 334a30f8f8cSSatish Balay } 3359566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 336a30f8f8cSSatish Balay } 337a30f8f8cSSatish Balay } 338a30f8f8cSSatish Balay } 3393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 340a30f8f8cSSatish Balay } 341a30f8f8cSSatish Balay 342d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 343d71ae5a4SJacob Faibussowitsch { 34436bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 34536bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 34636bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 34736bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 34836bd2089SBarry Smith PetscBool roworiented = a->roworiented; 34936bd2089SBarry Smith const PetscScalar *value = v; 35036bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 35136bd2089SBarry Smith 35236bd2089SBarry Smith PetscFunctionBegin; 35336bd2089SBarry Smith if (col < row) { 3543ba16761SJacob Faibussowitsch if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 35536bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 35636bd2089SBarry Smith } 35736bd2089SBarry Smith rp = aj + ai[row]; 35836bd2089SBarry Smith ap = aa + bs2 * ai[row]; 35936bd2089SBarry Smith rmax = imax[row]; 36036bd2089SBarry Smith nrow = ailen[row]; 36136bd2089SBarry Smith value = v; 36236bd2089SBarry Smith low = 0; 36336bd2089SBarry Smith high = nrow; 36436bd2089SBarry Smith 36536bd2089SBarry Smith while (high - low > 7) { 36636bd2089SBarry Smith t = (low + high) / 2; 36736bd2089SBarry Smith if (rp[t] > col) high = t; 36836bd2089SBarry Smith else low = t; 36936bd2089SBarry Smith } 37036bd2089SBarry Smith for (i = low; i < high; i++) { 37136bd2089SBarry Smith if (rp[i] > col) break; 37236bd2089SBarry Smith if (rp[i] == col) { 37336bd2089SBarry Smith bap = ap + bs2 * i; 37436bd2089SBarry Smith if (roworiented) { 37536bd2089SBarry Smith if (is == ADD_VALUES) { 37636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 377ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 37836bd2089SBarry Smith } 37936bd2089SBarry Smith } else { 38036bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 381ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 38236bd2089SBarry Smith } 38336bd2089SBarry Smith } 38436bd2089SBarry Smith } else { 38536bd2089SBarry Smith if (is == ADD_VALUES) { 38636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 387ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ += *value++; 38836bd2089SBarry Smith } 38936bd2089SBarry Smith } else { 39036bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 391ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 39236bd2089SBarry Smith } 39336bd2089SBarry Smith } 39436bd2089SBarry Smith } 39536bd2089SBarry Smith goto noinsert2; 39636bd2089SBarry Smith } 39736bd2089SBarry Smith } 39836bd2089SBarry Smith if (nonew == 1) goto noinsert2; 3995f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 40036bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4019371c9d4SSatish Balay N = nrow++ - 1; 4029371c9d4SSatish Balay high++; 40336bd2089SBarry Smith /* shift up all the later entries in this row */ 4049566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4059566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 40636bd2089SBarry Smith rp[i] = col; 40736bd2089SBarry Smith bap = ap + bs2 * i; 40836bd2089SBarry Smith if (roworiented) { 40936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 410ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 41136bd2089SBarry Smith } 41236bd2089SBarry Smith } else { 41336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 414ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 41536bd2089SBarry Smith } 41636bd2089SBarry Smith } 41736bd2089SBarry Smith noinsert2:; 41836bd2089SBarry Smith ailen[row] = nrow; 4193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42036bd2089SBarry Smith } 42136bd2089SBarry Smith 42236bd2089SBarry Smith /* 42336bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 42436bd2089SBarry Smith */ 425d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 426d71ae5a4SJacob Faibussowitsch { 42736bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 42836bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 42936bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 43036bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 43136bd2089SBarry Smith PetscBool roworiented = a->roworiented; 43236bd2089SBarry Smith const PetscScalar *value = v; 43336bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 43436bd2089SBarry Smith 43536bd2089SBarry Smith PetscFunctionBegin; 43636bd2089SBarry Smith rp = aj + ai[row]; 43736bd2089SBarry Smith ap = aa + bs2 * ai[row]; 43836bd2089SBarry Smith rmax = imax[row]; 43936bd2089SBarry Smith nrow = ailen[row]; 44036bd2089SBarry Smith low = 0; 44136bd2089SBarry Smith high = nrow; 44236bd2089SBarry Smith value = v; 44336bd2089SBarry Smith while (high - low > 7) { 44436bd2089SBarry Smith t = (low + high) / 2; 44536bd2089SBarry Smith if (rp[t] > col) high = t; 44636bd2089SBarry Smith else low = t; 44736bd2089SBarry Smith } 44836bd2089SBarry Smith for (i = low; i < high; i++) { 44936bd2089SBarry Smith if (rp[i] > col) break; 45036bd2089SBarry Smith if (rp[i] == col) { 45136bd2089SBarry Smith bap = ap + bs2 * i; 45236bd2089SBarry Smith if (roworiented) { 45336bd2089SBarry Smith if (is == ADD_VALUES) { 45436bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 455ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 45636bd2089SBarry Smith } 45736bd2089SBarry Smith } else { 45836bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 459ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 46036bd2089SBarry Smith } 46136bd2089SBarry Smith } 46236bd2089SBarry Smith } else { 46336bd2089SBarry Smith if (is == ADD_VALUES) { 46436bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 465ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 46636bd2089SBarry Smith bap += bs; 46736bd2089SBarry Smith } 46836bd2089SBarry Smith } else { 46936bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 470ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 47136bd2089SBarry Smith bap += bs; 47236bd2089SBarry Smith } 47336bd2089SBarry Smith } 47436bd2089SBarry Smith } 47536bd2089SBarry Smith goto noinsert2; 47636bd2089SBarry Smith } 47736bd2089SBarry Smith } 47836bd2089SBarry Smith if (nonew == 1) goto noinsert2; 4795f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 48036bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4819371c9d4SSatish Balay N = nrow++ - 1; 4829371c9d4SSatish Balay high++; 48336bd2089SBarry Smith /* shift up all the later entries in this row */ 4849566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4859566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 48636bd2089SBarry Smith rp[i] = col; 48736bd2089SBarry Smith bap = ap + bs2 * i; 48836bd2089SBarry Smith if (roworiented) { 48936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 490ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 49136bd2089SBarry Smith } 49236bd2089SBarry Smith } else { 49336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 494ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 49536bd2089SBarry Smith } 49636bd2089SBarry Smith } 49736bd2089SBarry Smith noinsert2:; 49836bd2089SBarry Smith ailen[row] = nrow; 4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50036bd2089SBarry Smith } 50136bd2089SBarry Smith 50236bd2089SBarry Smith /* 50336bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 50436bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 50536bd2089SBarry Smith */ 506d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 507d71ae5a4SJacob Faibussowitsch { 5080880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 509f15d580aSBarry Smith const MatScalar *value; 510f15d580aSBarry Smith MatScalar *barray = baij->barray; 511ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 512899cda47SBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 513476417e5SBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 514476417e5SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 5150880e062SHong Zhang 516a30f8f8cSSatish Balay PetscFunctionBegin; 5170880e062SHong Zhang if (!barray) { 5189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 5190880e062SHong Zhang baij->barray = barray; 5200880e062SHong Zhang } 5210880e062SHong Zhang 5220880e062SHong Zhang if (roworiented) { 5230880e062SHong Zhang stepval = (n - 1) * bs; 5240880e062SHong Zhang } else { 5250880e062SHong Zhang stepval = (m - 1) * bs; 5260880e062SHong Zhang } 5270880e062SHong Zhang for (i = 0; i < m; i++) { 5280880e062SHong Zhang if (im[i] < 0) continue; 5296bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 5300880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5310880e062SHong Zhang row = im[i] - rstart; 5320880e062SHong Zhang for (j = 0; j < n; j++) { 533f3f98c53SJed Brown if (im[i] > in[j]) { 534f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 535e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 536f3f98c53SJed Brown } 5370880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 5380880e062SHong Zhang if ((roworiented) && (n == 1)) { 539f15d580aSBarry Smith barray = (MatScalar *)v + i * bs2; 5400880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 541f15d580aSBarry Smith barray = (MatScalar *)v + j * bs2; 5420880e062SHong Zhang } else { /* Here a copy is required */ 5430880e062SHong Zhang if (roworiented) { 5440880e062SHong Zhang value = v + i * (stepval + bs) * bs + j * bs; 5450880e062SHong Zhang } else { 5460880e062SHong Zhang value = v + j * (stepval + bs) * bs + i * bs; 5470880e062SHong Zhang } 5480880e062SHong Zhang for (ii = 0; ii < bs; ii++, value += stepval) { 549ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *barray++ = *value++; 5500880e062SHong Zhang } 5510880e062SHong Zhang barray -= bs2; 5520880e062SHong Zhang } 5530880e062SHong Zhang 5540880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 5550880e062SHong Zhang col = in[j] - cstart; 5569566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 557f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 558f7d195e4SLawrence Mitchell continue; 559f7d195e4SLawrence Mitchell } else { 560f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 5610880e062SHong Zhang if (mat->was_assembled) { 56248a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 5630880e062SHong Zhang 5642515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 5650880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 5669371c9d4SSatish Balay { 5679371c9d4SSatish Balay PetscInt data; 568eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 56908401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 5700880e062SHong Zhang } 5710880e062SHong Zhang #else 57208401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 5730880e062SHong Zhang #endif 5740880e062SHong Zhang #endif 5750880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 576eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 5770880e062SHong Zhang col = (col - 1) / bs; 5780880e062SHong Zhang #else 5790880e062SHong Zhang col = (baij->colmap[in[j]] - 1) / bs; 5800880e062SHong Zhang #endif 5810880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 5829566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 5830880e062SHong Zhang col = in[j]; 5840880e062SHong Zhang } 58526fbe8dcSKarl Rupp } else col = in[j]; 5869566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 5870880e062SHong Zhang } 5880880e062SHong Zhang } 5890880e062SHong Zhang } else { 5905f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 5910880e062SHong Zhang if (!baij->donotstash) { 5920880e062SHong Zhang if (roworiented) { 5939566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5940880e062SHong Zhang } else { 5959566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5960880e062SHong Zhang } 5970880e062SHong Zhang } 5980880e062SHong Zhang } 5990880e062SHong Zhang } 6003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 601a30f8f8cSSatish Balay } 602a30f8f8cSSatish Balay 603d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 604d71ae5a4SJacob Faibussowitsch { 605f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 606d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 607d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 608a30f8f8cSSatish Balay 609a30f8f8cSSatish Balay PetscFunctionBegin; 610a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 61154c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 61254c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 613a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 614a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 615a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 61654c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 61754c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 618a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 619a30f8f8cSSatish Balay col = idxn[j] - bscstart; 6209566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 621a30f8f8cSSatish Balay } else { 62248a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 623a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 624eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 625a30f8f8cSSatish Balay data--; 626a30f8f8cSSatish Balay #else 627a30f8f8cSSatish Balay data = baij->colmap[idxn[j] / bs] - 1; 628a30f8f8cSSatish Balay #endif 629a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 630a30f8f8cSSatish Balay else { 631a30f8f8cSSatish Balay col = data + idxn[j] % bs; 6329566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 633a30f8f8cSSatish Balay } 634a30f8f8cSSatish Balay } 635a30f8f8cSSatish Balay } 636f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 637a30f8f8cSSatish Balay } 6383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 639a30f8f8cSSatish Balay } 640a30f8f8cSSatish Balay 641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 642d71ae5a4SJacob Faibussowitsch { 643a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 644a30f8f8cSSatish Balay PetscReal sum[2], *lnorm2; 645a30f8f8cSSatish Balay 646a30f8f8cSSatish Balay PetscFunctionBegin; 647a30f8f8cSSatish Balay if (baij->size == 1) { 6489566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, norm)); 649a30f8f8cSSatish Balay } else { 650a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 6519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2, &lnorm2)); 6529566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, lnorm2)); 6539371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 6549371c9d4SSatish Balay lnorm2++; /* squar power of norm(A) */ 6559566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->B, type, lnorm2)); 6569371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 6579371c9d4SSatish Balay lnorm2--; /* squar power of norm(B) */ 6581c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 6598f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 6609566063dSJacob Faibussowitsch PetscCall(PetscFree(lnorm2)); 6610b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 6620b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 6630b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 6640b8dc8d2SHong Zhang PetscReal *rsum, *rsum2, vabs; 665899cda47SBarry Smith PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 666d0f46423SBarry Smith PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 6670b8dc8d2SHong Zhang MatScalar *v; 6680b8dc8d2SHong Zhang 6699566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 6709566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 6710b8dc8d2SHong Zhang /* Amat */ 6729371c9d4SSatish Balay v = amat->a; 6739371c9d4SSatish Balay jj = amat->j; 6740b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 6750b8dc8d2SHong Zhang grow = bs * (rstart + brow); 6760b8dc8d2SHong Zhang nz = amat->i[brow + 1] - amat->i[brow]; 6770b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 6789371c9d4SSatish Balay gcol = bs * (rstart + *jj); 6799371c9d4SSatish Balay jj++; 6800b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 6810b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 6829371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 6839371c9d4SSatish Balay v++; 6840b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 6850b8dc8d2SHong Zhang /* non-diagonal block */ 6860b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 6870b8dc8d2SHong Zhang } 6880b8dc8d2SHong Zhang } 6890b8dc8d2SHong Zhang } 6909566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 6910b8dc8d2SHong Zhang } 6920b8dc8d2SHong Zhang /* Bmat */ 6939371c9d4SSatish Balay v = bmat->a; 6949371c9d4SSatish Balay jj = bmat->j; 6950b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 6960b8dc8d2SHong Zhang grow = bs * (rstart + brow); 6970b8dc8d2SHong Zhang nz = bmat->i[brow + 1] - bmat->i[brow]; 6980b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 6999371c9d4SSatish Balay gcol = bs * garray[*jj]; 7009371c9d4SSatish Balay jj++; 7010b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7020b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7039371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7049371c9d4SSatish Balay v++; 7050b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7060b8dc8d2SHong Zhang rsum[grow + row] += vabs; 7070b8dc8d2SHong Zhang } 7080b8dc8d2SHong Zhang } 7090b8dc8d2SHong Zhang } 7109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7110b8dc8d2SHong Zhang } 7121c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7130b8dc8d2SHong Zhang *norm = 0.0; 714d0f46423SBarry Smith for (col = 0; col < mat->cmap->N; col++) { 7150b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7160b8dc8d2SHong Zhang } 7179566063dSJacob Faibussowitsch PetscCall(PetscFree2(rsum, rsum2)); 718f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 719a30f8f8cSSatish Balay } 7203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 721a30f8f8cSSatish Balay } 722a30f8f8cSSatish Balay 723d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 724d71ae5a4SJacob Faibussowitsch { 725a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 7261302d50aSBarry Smith PetscInt nstash, reallocs; 727a30f8f8cSSatish Balay 728a30f8f8cSSatish Balay PetscFunctionBegin; 7293ba16761SJacob Faibussowitsch if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 730a30f8f8cSSatish Balay 7319566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 7329566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 7339566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7349566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7359566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7369566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 738a30f8f8cSSatish Balay } 739a30f8f8cSSatish Balay 740d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 741d71ae5a4SJacob Faibussowitsch { 742a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 743a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 74413f74950SBarry Smith PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 745e44c0bd4SBarry Smith PetscInt *row, *col; 746ace3abfcSBarry Smith PetscBool other_disassembled; 74713f74950SBarry Smith PetscMPIInt n; 748ace3abfcSBarry Smith PetscBool r1, r2, r3; 749a30f8f8cSSatish Balay MatScalar *val; 750a30f8f8cSSatish Balay 75191c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 752a30f8f8cSSatish Balay PetscFunctionBegin; 7534cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 754a30f8f8cSSatish Balay while (1) { 7559566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 756a30f8f8cSSatish Balay if (!flg) break; 757a30f8f8cSSatish Balay 758a30f8f8cSSatish Balay for (i = 0; i < n;) { 759a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 76026fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 76126fbe8dcSKarl Rupp if (row[j] != rstart) break; 76226fbe8dcSKarl Rupp } 763a30f8f8cSSatish Balay if (j < n) ncols = j - i; 764a30f8f8cSSatish Balay else ncols = n - i; 765a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 7669566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 767a30f8f8cSSatish Balay i = j; 768a30f8f8cSSatish Balay } 769a30f8f8cSSatish Balay } 7709566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 771a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 772a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 773a30f8f8cSSatish Balay restore the original flags */ 774a30f8f8cSSatish Balay r1 = baij->roworiented; 775a30f8f8cSSatish Balay r2 = a->roworiented; 77691c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 77726fbe8dcSKarl Rupp 778a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 779a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 78026fbe8dcSKarl Rupp 78191c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 782a30f8f8cSSatish Balay while (1) { 7839566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 784a30f8f8cSSatish Balay if (!flg) break; 785a30f8f8cSSatish Balay 786a30f8f8cSSatish Balay for (i = 0; i < n;) { 787a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 78826fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 78926fbe8dcSKarl Rupp if (row[j] != rstart) break; 79026fbe8dcSKarl Rupp } 791a30f8f8cSSatish Balay if (j < n) ncols = j - i; 792a30f8f8cSSatish Balay else ncols = n - i; 7939566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 794a30f8f8cSSatish Balay i = j; 795a30f8f8cSSatish Balay } 796a30f8f8cSSatish Balay } 7979566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 79826fbe8dcSKarl Rupp 799a30f8f8cSSatish Balay baij->roworiented = r1; 800a30f8f8cSSatish Balay a->roworiented = r2; 80126fbe8dcSKarl Rupp 80291c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 803a30f8f8cSSatish Balay } 804a30f8f8cSSatish Balay 8059566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->A, mode)); 8069566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->A, mode)); 807a30f8f8cSSatish Balay 808a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 8096aad120cSJose E. Roman also disassemble ourselves, in order that we may reassemble. */ 810a30f8f8cSSatish Balay /* 811a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 812a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 813a30f8f8cSSatish Balay */ 814a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 8155f9db2b2SJunchao Zhang PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 81648a46eb9SPierre Jolivet if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 817a30f8f8cSSatish Balay } 818a30f8f8cSSatish Balay 8199371c9d4SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 8209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->B, mode)); 8219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->B, mode)); 822a30f8f8cSSatish Balay 8239566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 82426fbe8dcSKarl Rupp 825f4259b30SLisandro Dalcin baij->rowvalues = NULL; 8264f9cfa9eSBarry Smith 8274f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8284f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 829e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 8301c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 831e56f5c9eSBarry Smith } 8323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 833a30f8f8cSSatish Balay } 834a30f8f8cSSatish Balay 835dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 8369804daf3SBarry Smith #include <petscdraw.h> 837d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 838d71ae5a4SJacob Faibussowitsch { 839a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 840d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 8417da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 842ace3abfcSBarry Smith PetscBool iascii, isdraw; 843b0a32e0cSBarry Smith PetscViewer sviewer; 844f3ef73ceSBarry Smith PetscViewerFormat format; 845a30f8f8cSSatish Balay 846a30f8f8cSSatish Balay PetscFunctionBegin; 8479566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 8489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 84932077d6dSBarry Smith if (iascii) { 8509566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 851456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 852a30f8f8cSSatish Balay MatInfo info; 8539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 8549566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 8559566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 8569371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 8579371c9d4SSatish Balay mat->rmap->bs, (double)info.memory)); 8589566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 8599566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 8609566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 8619566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 8629566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 8639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 8649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 8659566063dSJacob Faibussowitsch PetscCall(VecScatterView(baij->Mvctx, viewer)); 8663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 867fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 8689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 8693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 870c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 8713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 872a30f8f8cSSatish Balay } 873a30f8f8cSSatish Balay } 874a30f8f8cSSatish Balay 875a30f8f8cSSatish Balay if (isdraw) { 876b0a32e0cSBarry Smith PetscDraw draw; 877ace3abfcSBarry Smith PetscBool isnull; 8789566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 8799566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 8803ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 881a30f8f8cSSatish Balay } 882a30f8f8cSSatish Balay 8837da1fb6eSBarry Smith { 884a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 885a30f8f8cSSatish Balay Mat A; 88665d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 88765d70643SHong Zhang Mat_SeqBAIJ *Bloc; 888d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 889a30f8f8cSSatish Balay MatScalar *a; 8903e219373SBarry Smith const char *matname; 891a30f8f8cSSatish Balay 892f204ca49SKris Buschelman /* Should this be the same type as mat? */ 8939566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 894dd400576SPatrick Sanan if (rank == 0) { 8959566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 896a30f8f8cSSatish Balay } else { 8979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 898a30f8f8cSSatish Balay } 8999566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPISBAIJ)); 9009566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 9019566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 902a30f8f8cSSatish Balay 903a30f8f8cSSatish Balay /* copy over the A part */ 90465d70643SHong Zhang Aloc = (Mat_SeqSBAIJ *)baij->A->data; 9059371c9d4SSatish Balay ai = Aloc->i; 9069371c9d4SSatish Balay aj = Aloc->j; 9079371c9d4SSatish Balay a = Aloc->a; 9089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 909a30f8f8cSSatish Balay 910a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 911e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 91226fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 913a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 914e9f7bc9eSHong Zhang col = (baij->cstartbs + aj[j]) * bs; 915a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9169566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 91726fbe8dcSKarl Rupp col++; 91826fbe8dcSKarl Rupp a += bs; 919a30f8f8cSSatish Balay } 920a30f8f8cSSatish Balay } 921a30f8f8cSSatish Balay } 922a30f8f8cSSatish Balay /* copy over the B part */ 92365d70643SHong Zhang Bloc = (Mat_SeqBAIJ *)baij->B->data; 9249371c9d4SSatish Balay ai = Bloc->i; 9259371c9d4SSatish Balay aj = Bloc->j; 9269371c9d4SSatish Balay a = Bloc->a; 927a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 928e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 92926fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 930a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 931a30f8f8cSSatish Balay col = baij->garray[aj[j]] * bs; 932a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9339566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 93426fbe8dcSKarl Rupp col++; 93526fbe8dcSKarl Rupp a += bs; 936a30f8f8cSSatish Balay } 937a30f8f8cSSatish Balay } 938a30f8f8cSSatish Balay } 9399566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 9409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 9419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 942a30f8f8cSSatish Balay /* 943a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 944b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 945a30f8f8cSSatish Balay */ 9469566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 947*23a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 948dd400576SPatrick Sanan if (rank == 0) { 949*23a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 9509566063dSJacob Faibussowitsch PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 951a30f8f8cSSatish Balay } 9529566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 9539566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 955a30f8f8cSSatish Balay } 9563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 957a30f8f8cSSatish Balay } 958a30f8f8cSSatish Balay 959618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 960618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 961d1654148SHong Zhang 962d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 963d71ae5a4SJacob Faibussowitsch { 964ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 965a30f8f8cSSatish Balay 966a30f8f8cSSatish Balay PetscFunctionBegin; 9679566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 9689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 9699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 9709566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 971d1654148SHong Zhang if (iascii || isdraw || issocket) { 9729566063dSJacob Faibussowitsch PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 9731baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 9743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 975a30f8f8cSSatish Balay } 976a30f8f8cSSatish Balay 977d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 978d71ae5a4SJacob Faibussowitsch { 979a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 980a30f8f8cSSatish Balay 981a30f8f8cSSatish Balay PetscFunctionBegin; 982a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 9833ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 984a30f8f8cSSatish Balay #endif 9859566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 9869566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->bstash)); 9879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->A)); 9889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->B)); 989a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 990eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap)); 991a30f8f8cSSatish Balay #else 9929566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 993a30f8f8cSSatish Balay #endif 9949566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 9959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 9969566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 9979566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0)); 9989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0b)); 9999566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1)); 10009566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1a)); 10019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1b)); 10029566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->sMvctx)); 10039566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 10049566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->barray)); 10059566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->hd)); 10069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->diag)); 10079566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->bb1)); 10089566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->xx1)); 1009ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 10109566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->setvaluescopy)); 1011a30f8f8cSSatish Balay #endif 10129566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 10139566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 10149566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->rangebs)); 10159566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 1016901853e0SKris Buschelman 10179566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 10189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 10199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 10209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 10219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 10226214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 10239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 10246214f412SHong Zhang #endif 1025d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 10269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1027d24d4204SJose E. Roman #endif 10289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 10299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 10303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1031a30f8f8cSSatish Balay } 1032a30f8f8cSSatish Balay 1033d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1034d71ae5a4SJacob Faibussowitsch { 1035547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1036eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 10376de40e93SBarry Smith PetscScalar *from; 10386de40e93SBarry Smith const PetscScalar *x; 1039547795f9SHong Zhang 1040547795f9SHong Zhang PetscFunctionBegin; 1041547795f9SHong Zhang /* diagonal part */ 10429566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10439566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1044547795f9SHong Zhang 1045547795f9SHong Zhang /* subdiagonal part */ 10465f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 10479566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1048547795f9SHong Zhang 1049547795f9SHong Zhang /* copy x into the vec slvec0 */ 10509566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1052547795f9SHong Zhang 10539566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10559566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1056547795f9SHong Zhang 10579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10589566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1059547795f9SHong Zhang /* supperdiagonal part */ 10609566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 10613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1062547795f9SHong Zhang } 1063547795f9SHong Zhang 1064d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1065d71ae5a4SJacob Faibussowitsch { 1066a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1067eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1068d9ca1df4SBarry Smith PetscScalar *from; 1069d9ca1df4SBarry Smith const PetscScalar *x; 1070a9d4b620SHong Zhang 1071a9d4b620SHong Zhang PetscFunctionBegin; 1072a9d4b620SHong Zhang /* diagonal part */ 10739566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10749566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1075a9d4b620SHong Zhang 1076a9d4b620SHong Zhang /* subdiagonal part */ 10779566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1078fc165ae2SBarry Smith 1079a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 10809566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1082a9d4b620SHong Zhang 10839566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10849566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1086fc165ae2SBarry Smith 10879566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10889566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1089a9d4b620SHong Zhang /* supperdiagonal part */ 10909566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 10913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1092a9d4b620SHong Zhang } 1093a9d4b620SHong Zhang 1094d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1095d71ae5a4SJacob Faibussowitsch { 1096eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1097eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1098eb1ec7c1SStefano Zampini PetscScalar *from, zero = 0.0; 1099eb1ec7c1SStefano Zampini const PetscScalar *x; 1100eb1ec7c1SStefano Zampini 1101eb1ec7c1SStefano Zampini PetscFunctionBegin; 1102eb1ec7c1SStefano Zampini /* diagonal part */ 11039566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11049566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1105eb1ec7c1SStefano Zampini 1106eb1ec7c1SStefano Zampini /* subdiagonal part */ 11075f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 11089566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1109eb1ec7c1SStefano Zampini 1110eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 11119566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11139566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1115eb1ec7c1SStefano Zampini 11169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1119eb1ec7c1SStefano Zampini 1120eb1ec7c1SStefano Zampini /* supperdiagonal part */ 11219566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1123eb1ec7c1SStefano Zampini } 1124eb1ec7c1SStefano Zampini 1125d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126d71ae5a4SJacob Faibussowitsch { 1127de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1128d0f46423SBarry Smith PetscInt mbs = a->mbs, bs = A->rmap->bs; 1129d9ca1df4SBarry Smith PetscScalar *from, zero = 0.0; 1130d9ca1df4SBarry Smith const PetscScalar *x; 1131a9d4b620SHong Zhang 1132a9d4b620SHong Zhang PetscFunctionBegin; 1133a9d4b620SHong Zhang /* diagonal part */ 11349566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11359566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1136a9d4b620SHong Zhang 1137a9d4b620SHong Zhang /* subdiagonal part */ 11389566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1139a9d4b620SHong Zhang 1140a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11419566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1145a9d4b620SHong Zhang 11469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11489566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1149a9d4b620SHong Zhang 1150a9d4b620SHong Zhang /* supperdiagonal part */ 11519566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1153a9d4b620SHong Zhang } 1154a9d4b620SHong Zhang 1155a30f8f8cSSatish Balay /* 1156a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1157a30f8f8cSSatish Balay diagonal block 1158a30f8f8cSSatish Balay */ 1159d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1160d71ae5a4SJacob Faibussowitsch { 1161a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1162a30f8f8cSSatish Balay 1163a30f8f8cSSatish Balay PetscFunctionBegin; 116408401ef6SPierre Jolivet /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 11659566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A, v)); 11663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1167a30f8f8cSSatish Balay } 1168a30f8f8cSSatish Balay 1169d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1170d71ae5a4SJacob Faibussowitsch { 1171a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1172a30f8f8cSSatish Balay 1173a30f8f8cSSatish Balay PetscFunctionBegin; 11749566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 11759566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 11763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1177a30f8f8cSSatish Balay } 1178a30f8f8cSSatish Balay 1179d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1180d71ae5a4SJacob Faibussowitsch { 1181d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1182d0d4cfc2SHong Zhang PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1183d0f46423SBarry Smith PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1184d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1185899cda47SBarry Smith PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1186d0d4cfc2SHong Zhang 1187a30f8f8cSSatish Balay PetscFunctionBegin; 11885f80ce2aSJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1189d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1190d0d4cfc2SHong Zhang 1191d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1192d0d4cfc2SHong Zhang /* 1193d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1194d0d4cfc2SHong Zhang */ 1195d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1196d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1197d0d4cfc2SHong Zhang PetscInt max = 1, mbs = mat->mbs, tmp; 1198d0d4cfc2SHong Zhang for (i = 0; i < mbs; i++) { 1199d0d4cfc2SHong Zhang tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 120026fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1201d0d4cfc2SHong Zhang } 12029566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1203d0d4cfc2SHong Zhang } 1204d0d4cfc2SHong Zhang 12055f80ce2aSJacob Faibussowitsch PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1206d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1207d0d4cfc2SHong Zhang 12089371c9d4SSatish Balay pvA = &vworkA; 12099371c9d4SSatish Balay pcA = &cworkA; 12109371c9d4SSatish Balay pvB = &vworkB; 12119371c9d4SSatish Balay pcB = &cworkB; 12129371c9d4SSatish Balay if (!v) { 12139371c9d4SSatish Balay pvA = NULL; 12149371c9d4SSatish Balay pvB = NULL; 12159371c9d4SSatish Balay } 12169371c9d4SSatish Balay if (!idx) { 12179371c9d4SSatish Balay pcA = NULL; 12189371c9d4SSatish Balay if (!v) pcB = NULL; 12199371c9d4SSatish Balay } 12209566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 12219566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1222d0d4cfc2SHong Zhang nztot = nzA + nzB; 1223d0d4cfc2SHong Zhang 1224d0d4cfc2SHong Zhang cmap = mat->garray; 1225d0d4cfc2SHong Zhang if (v || idx) { 1226d0d4cfc2SHong Zhang if (nztot) { 1227d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1228d0d4cfc2SHong Zhang PetscInt imark = -1; 1229d0d4cfc2SHong Zhang if (v) { 1230d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1231d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 1232d0d4cfc2SHong Zhang if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1233d0d4cfc2SHong Zhang else break; 1234d0d4cfc2SHong Zhang } 1235d0d4cfc2SHong Zhang imark = i; 1236d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1237d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1238d0d4cfc2SHong Zhang } 1239d0d4cfc2SHong Zhang if (idx) { 1240d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1241d0d4cfc2SHong Zhang if (imark > -1) { 1242ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1243d0d4cfc2SHong Zhang } else { 1244d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 124526fbe8dcSKarl Rupp if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1246d0d4cfc2SHong Zhang else break; 1247d0d4cfc2SHong Zhang } 1248d0d4cfc2SHong Zhang imark = i; 1249d0d4cfc2SHong Zhang } 1250d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1251d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1252d0d4cfc2SHong Zhang } 1253d0d4cfc2SHong Zhang } else { 1254f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1255f4259b30SLisandro Dalcin if (v) *v = NULL; 1256d0d4cfc2SHong Zhang } 1257d0d4cfc2SHong Zhang } 1258d0d4cfc2SHong Zhang *nz = nztot; 12599566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 12609566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 12613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1262a30f8f8cSSatish Balay } 1263a30f8f8cSSatish Balay 1264d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1265d71ae5a4SJacob Faibussowitsch { 1266a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1267a30f8f8cSSatish Balay 1268a30f8f8cSSatish Balay PetscFunctionBegin; 12695f80ce2aSJacob Faibussowitsch PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1270a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 12713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1272a30f8f8cSSatish Balay } 1273a30f8f8cSSatish Balay 1274d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1275d71ae5a4SJacob Faibussowitsch { 1276d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1277d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1278d0d4cfc2SHong Zhang 1279d0d4cfc2SHong Zhang PetscFunctionBegin; 1280d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 12813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1282d0d4cfc2SHong Zhang } 1283d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1284d71ae5a4SJacob Faibussowitsch { 1285d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1286d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1287d0d4cfc2SHong Zhang 1288d0d4cfc2SHong Zhang PetscFunctionBegin; 1289d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 12903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1291d0d4cfc2SHong Zhang } 1292d0d4cfc2SHong Zhang 1293d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1294d71ae5a4SJacob Faibussowitsch { 12955f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 12965f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 12972726fb6dSPierre Jolivet Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 12982726fb6dSPierre Jolivet 12999566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 13009566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->B)); 13015f80ce2aSJacob Faibussowitsch } 13023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13032726fb6dSPierre Jolivet } 13042726fb6dSPierre Jolivet 1305d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1306d71ae5a4SJacob Faibussowitsch { 130799cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 130899cafbc1SBarry Smith 130999cafbc1SBarry Smith PetscFunctionBegin; 13109566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 13119566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 13123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 131399cafbc1SBarry Smith } 131499cafbc1SBarry Smith 1315d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1316d71ae5a4SJacob Faibussowitsch { 131799cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 131899cafbc1SBarry Smith 131999cafbc1SBarry Smith PetscFunctionBegin; 13209566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 13219566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 13223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132399cafbc1SBarry Smith } 132499cafbc1SBarry Smith 13257dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 132636032a97SHong Zhang Input: isrow - distributed(parallel), 132736032a97SHong Zhang iscol_local - locally owned (seq) 132836032a97SHong Zhang */ 1329d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1330d71ae5a4SJacob Faibussowitsch { 13318f46ffcaSHong Zhang PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 13328f46ffcaSHong Zhang const PetscInt *ptr1, *ptr2; 133336032a97SHong Zhang 133436032a97SHong Zhang PetscFunctionBegin; 13359566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &sz1)); 13369566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local, &sz2)); 13371098a8e8SHong Zhang if (sz1 > sz2) { 13381098a8e8SHong Zhang *flg = PETSC_FALSE; 13393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13401098a8e8SHong Zhang } 13418f46ffcaSHong Zhang 13429566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &ptr1)); 13439566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &ptr2)); 13448f46ffcaSHong Zhang 13459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz1, &a1)); 13469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz2, &a2)); 13479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a1, ptr1, sz1)); 13489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a2, ptr2, sz2)); 13499566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz1, a1)); 13509566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz2, a2)); 13518f46ffcaSHong Zhang 13528f46ffcaSHong Zhang nmatch = 0; 13538f46ffcaSHong Zhang k = 0; 13548f46ffcaSHong Zhang for (i = 0; i < sz1; i++) { 13558f46ffcaSHong Zhang for (j = k; j < sz2; j++) { 13568f46ffcaSHong Zhang if (a1[i] == a2[j]) { 13579371c9d4SSatish Balay k = j; 13589371c9d4SSatish Balay nmatch++; 13598f46ffcaSHong Zhang break; 13608f46ffcaSHong Zhang } 13618f46ffcaSHong Zhang } 13628f46ffcaSHong Zhang } 13639566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &ptr1)); 13649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 13659566063dSJacob Faibussowitsch PetscCall(PetscFree(a1)); 13669566063dSJacob Faibussowitsch PetscCall(PetscFree(a2)); 13671098a8e8SHong Zhang if (nmatch < sz1) { 13681098a8e8SHong Zhang *flg = PETSC_FALSE; 13691098a8e8SHong Zhang } else { 13701098a8e8SHong Zhang *flg = PETSC_TRUE; 13711098a8e8SHong Zhang } 13723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13738f46ffcaSHong Zhang } 137436032a97SHong Zhang 1375d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1376d71ae5a4SJacob Faibussowitsch { 137736032a97SHong Zhang IS iscol_local; 137836032a97SHong Zhang PetscInt csize; 137936032a97SHong Zhang PetscBool isequal; 138036032a97SHong Zhang 138136032a97SHong Zhang PetscFunctionBegin; 13829566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 138336032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 13849566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 13855f80ce2aSJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 138636032a97SHong Zhang } else { 1387068661f9SToby Isaac PetscBool issorted; 1388068661f9SToby Isaac 13899566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 13909566063dSJacob Faibussowitsch PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 13919566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local, &issorted)); 13925f80ce2aSJacob Faibussowitsch PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 13938f46ffcaSHong Zhang } 13948f46ffcaSHong Zhang 13957dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 13969566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 13978f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 13989566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 13999566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 14008f46ffcaSHong Zhang } 14013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14028f46ffcaSHong Zhang } 14038f46ffcaSHong Zhang 1404d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1405d71ae5a4SJacob Faibussowitsch { 1406a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1407a30f8f8cSSatish Balay 1408a30f8f8cSSatish Balay PetscFunctionBegin; 14099566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 14109566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 14113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1412a30f8f8cSSatish Balay } 1413a30f8f8cSSatish Balay 1414d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1415d71ae5a4SJacob Faibussowitsch { 1416a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1417a30f8f8cSSatish Balay Mat A = a->A, B = a->B; 14183966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 1419a30f8f8cSSatish Balay 1420a30f8f8cSSatish Balay PetscFunctionBegin; 1421d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 142226fbe8dcSKarl Rupp 14239566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 142426fbe8dcSKarl Rupp 14259371c9d4SSatish Balay isend[0] = info->nz_used; 14269371c9d4SSatish Balay isend[1] = info->nz_allocated; 14279371c9d4SSatish Balay isend[2] = info->nz_unneeded; 14289371c9d4SSatish Balay isend[3] = info->memory; 14299371c9d4SSatish Balay isend[4] = info->mallocs; 143026fbe8dcSKarl Rupp 14319566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 143226fbe8dcSKarl Rupp 14339371c9d4SSatish Balay isend[0] += info->nz_used; 14349371c9d4SSatish Balay isend[1] += info->nz_allocated; 14359371c9d4SSatish Balay isend[2] += info->nz_unneeded; 14369371c9d4SSatish Balay isend[3] += info->memory; 14379371c9d4SSatish Balay isend[4] += info->mallocs; 1438a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1439a30f8f8cSSatish Balay info->nz_used = isend[0]; 1440a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1441a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1442a30f8f8cSSatish Balay info->memory = isend[3]; 1443a30f8f8cSSatish Balay info->mallocs = isend[4]; 1444a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14451c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 144626fbe8dcSKarl Rupp 1447a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1448a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1449a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1450a30f8f8cSSatish Balay info->memory = irecv[3]; 1451a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1452a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14531c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 145426fbe8dcSKarl Rupp 1455a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1456a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1457a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1458a30f8f8cSSatish Balay info->memory = irecv[3]; 1459a30f8f8cSSatish Balay info->mallocs = irecv[4]; 146098921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1461a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1462a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1463a30f8f8cSSatish Balay info->factor_mallocs = 0; 14643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1465a30f8f8cSSatish Balay } 1466a30f8f8cSSatish Balay 1467d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1468d71ae5a4SJacob Faibussowitsch { 1469a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1470d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1471a30f8f8cSSatish Balay 1472a30f8f8cSSatish Balay PetscFunctionBegin; 1473e98b92d7SKris Buschelman switch (op) { 1474512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1475e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 147628b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1478c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1479e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 148043674050SBarry Smith MatCheckPreallocated(A, 1); 14819566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14829566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1483e98b92d7SKris Buschelman break; 1484e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 148543674050SBarry Smith MatCheckPreallocated(A, 1); 14864e0d8c25SBarry Smith a->roworiented = flg; 148726fbe8dcSKarl Rupp 14889566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14899566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1490e98b92d7SKris Buschelman break; 14918c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1492d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1493d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1494d71ae5a4SJacob Faibussowitsch break; 1495d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 1496d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 1497d71ae5a4SJacob Faibussowitsch break; 1498d71ae5a4SJacob Faibussowitsch case MAT_USE_HASH_TABLE: 1499d71ae5a4SJacob Faibussowitsch a->ht_flag = flg; 1500d71ae5a4SJacob Faibussowitsch break; 1501d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 1502d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1503d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 15040f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1505eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1506547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1507eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1508eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1509eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1510b94d7dedSBarry Smith A->symmetric = PETSC_BOOL3_FALSE; 1511eb1ec7c1SStefano Zampini } 15120f2140c7SStefano Zampini #endif 1513eeffb40dSHong Zhang break; 1514ffa07934SHong Zhang case MAT_SPD: 1515d71ae5a4SJacob Faibussowitsch case MAT_SYMMETRIC: 1516d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1517d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1518eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1519eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1520eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1521eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1522eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1523eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1524eb1ec7c1SStefano Zampini } 1525eb1ec7c1SStefano Zampini #endif 1526eeffb40dSHong Zhang break; 152777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 152843674050SBarry Smith MatCheckPreallocated(A, 1); 15299566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1530eeffb40dSHong Zhang break; 15319a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1532b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 15335f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 15349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 153577e54ba9SKris Buschelman break; 1536d71ae5a4SJacob Faibussowitsch case MAT_SPD_ETERNAL: 1537d71ae5a4SJacob Faibussowitsch break; 1538d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_LOWER_TRIANGULAR: 1539d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1540d71ae5a4SJacob Faibussowitsch break; 1541d71ae5a4SJacob Faibussowitsch case MAT_ERROR_LOWER_TRIANGULAR: 1542d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1543d71ae5a4SJacob Faibussowitsch break; 1544d71ae5a4SJacob Faibussowitsch case MAT_GETROW_UPPERTRIANGULAR: 1545d71ae5a4SJacob Faibussowitsch aA->getrow_utriangular = flg; 1546d71ae5a4SJacob Faibussowitsch break; 1547d71ae5a4SJacob Faibussowitsch default: 1548d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1549a30f8f8cSSatish Balay } 15503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1551a30f8f8cSSatish Balay } 1552a30f8f8cSSatish Balay 1553d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1554d71ae5a4SJacob Faibussowitsch { 1555a30f8f8cSSatish Balay PetscFunctionBegin; 15567fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1557cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 15589566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1559cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 15609566063dSJacob Faibussowitsch PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1561fc4dec0aSBarry Smith } 15623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1563a30f8f8cSSatish Balay } 1564a30f8f8cSSatish Balay 1565d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1566d71ae5a4SJacob Faibussowitsch { 1567a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1568a30f8f8cSSatish Balay Mat a = baij->A, b = baij->B; 15695e90f9d9SHong Zhang PetscInt nv, m, n; 1570ace3abfcSBarry Smith PetscBool flg; 1571a30f8f8cSSatish Balay 1572a30f8f8cSSatish Balay PetscFunctionBegin; 1573a30f8f8cSSatish Balay if (ll != rr) { 15749566063dSJacob Faibussowitsch PetscCall(VecEqual(ll, rr, &flg)); 15755f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1576a30f8f8cSSatish Balay } 15773ba16761SJacob Faibussowitsch if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1578b3bf805bSHong Zhang 15799566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 15805f80ce2aSJacob Faibussowitsch PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1581b3bf805bSHong Zhang 15829566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &nv)); 15835f80ce2aSJacob Faibussowitsch PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 15845e90f9d9SHong Zhang 15859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 15865e90f9d9SHong Zhang 15875e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 1588dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 15895e90f9d9SHong Zhang 15905e90f9d9SHong Zhang /* scale the diagonal part */ 1591dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 1592a30f8f8cSSatish Balay 15935e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 15949566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1595dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 15963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1597a30f8f8cSSatish Balay } 1598a30f8f8cSSatish Balay 1599d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1600d71ae5a4SJacob Faibussowitsch { 1601f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1602a30f8f8cSSatish Balay 1603a30f8f8cSSatish Balay PetscFunctionBegin; 16049566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 16053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1606a30f8f8cSSatish Balay } 1607a30f8f8cSSatish Balay 16086849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1609a30f8f8cSSatish Balay 1610d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1611d71ae5a4SJacob Faibussowitsch { 1612a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1613a30f8f8cSSatish Balay Mat a, b, c, d; 1614ace3abfcSBarry Smith PetscBool flg; 1615a30f8f8cSSatish Balay 1616a30f8f8cSSatish Balay PetscFunctionBegin; 16179371c9d4SSatish Balay a = matA->A; 16189371c9d4SSatish Balay b = matA->B; 16199371c9d4SSatish Balay c = matB->A; 16209371c9d4SSatish Balay d = matB->B; 1621a30f8f8cSSatish Balay 16229566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 162348a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 16241c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 16253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1626a30f8f8cSSatish Balay } 1627a30f8f8cSSatish Balay 1628d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1629d71ae5a4SJacob Faibussowitsch { 16304c7a3774SStefano Zampini PetscBool isbaij; 16313c896bc6SHong Zhang 16323c896bc6SHong Zhang PetscFunctionBegin; 16339566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 16345f80ce2aSJacob Faibussowitsch PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 16353c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16363c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 16379566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 16389566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 16399566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 16403c896bc6SHong Zhang } else { 16414c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 16424c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 16434c7a3774SStefano Zampini 16449566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 16459566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 16463c896bc6SHong Zhang } 16479566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 16483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16493c896bc6SHong Zhang } 16503c896bc6SHong Zhang 1651d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1652d71ae5a4SJacob Faibussowitsch { 1653273d9f13SBarry Smith PetscFunctionBegin; 16549566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 16553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1656273d9f13SBarry Smith } 1657a5e6ed63SBarry Smith 1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1659d71ae5a4SJacob Faibussowitsch { 16604fe895cdSHong Zhang Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 16614fe895cdSHong Zhang PetscBLASInt bnz, one = 1; 16624fe895cdSHong Zhang Mat_SeqSBAIJ *xa, *ya; 16634fe895cdSHong Zhang Mat_SeqBAIJ *xb, *yb; 16644fe895cdSHong Zhang 16654fe895cdSHong Zhang PetscFunctionBegin; 16664fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 16674fe895cdSHong Zhang PetscScalar alpha = a; 16684fe895cdSHong Zhang xa = (Mat_SeqSBAIJ *)xx->A->data; 16694fe895cdSHong Zhang ya = (Mat_SeqSBAIJ *)yy->A->data; 16709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1671792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 16724fe895cdSHong Zhang xb = (Mat_SeqBAIJ *)xx->B->data; 16734fe895cdSHong Zhang yb = (Mat_SeqBAIJ *)yy->B->data; 16749566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1675792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 16769566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1677ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 16789566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 16799566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 16809566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 16814fe895cdSHong Zhang } else { 16824de5dceeSHong Zhang Mat B; 16834de5dceeSHong Zhang PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 16845f80ce2aSJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 16859566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(X)); 16869566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(Y)); 16879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 16889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 16899566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 16909566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 16919566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 16929566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 16939566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPISBAIJ)); 16949566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 16959566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 16969566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 16979566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 16989566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 16999566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 17009566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 17019566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(X)); 17029566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(Y)); 17034fe895cdSHong Zhang } 17043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17054fe895cdSHong Zhang } 17064fe895cdSHong Zhang 1707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1708d71ae5a4SJacob Faibussowitsch { 17091302d50aSBarry Smith PetscInt i; 1710afebec48SHong Zhang PetscBool flg; 1711a5e6ed63SBarry Smith 17126849ba73SBarry Smith PetscFunctionBegin; 17139566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1714a5e6ed63SBarry Smith for (i = 0; i < n; i++) { 17159566063dSJacob Faibussowitsch PetscCall(ISEqual(irow[i], icol[i], &flg)); 171648a46eb9SPierre Jolivet if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 17174dcd73b1SHong Zhang } 17183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1719a5e6ed63SBarry Smith } 1720a5e6ed63SBarry Smith 1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1722d71ae5a4SJacob Faibussowitsch { 17237d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 17246f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 17257d68702bSBarry Smith 17267d68702bSBarry Smith PetscFunctionBegin; 17276f33a894SBarry Smith if (!Y->preallocated) { 17289566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 17296f33a894SBarry Smith } else if (!aij->nz) { 1730b83222d8SBarry Smith PetscInt nonew = aij->nonew; 17319566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1732b83222d8SBarry Smith aij->nonew = nonew; 17337d68702bSBarry Smith } 17349566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 17353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17367d68702bSBarry Smith } 17377d68702bSBarry Smith 1738d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1739d71ae5a4SJacob Faibussowitsch { 17403b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 17413b49f96aSBarry Smith 17423b49f96aSBarry Smith PetscFunctionBegin; 17435f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 17449566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 17453b49f96aSBarry Smith if (d) { 17463b49f96aSBarry Smith PetscInt rstart; 17479566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 17483b49f96aSBarry Smith *d += rstart / A->rmap->bs; 17493b49f96aSBarry Smith } 17503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17513b49f96aSBarry Smith } 17523b49f96aSBarry Smith 1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1754d71ae5a4SJacob Faibussowitsch { 1755a5b7ff6bSBarry Smith PetscFunctionBegin; 1756a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ *)A->data)->A; 17573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1758a5b7ff6bSBarry Smith } 17593b49f96aSBarry Smith 1760a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 17613964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1762a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1763a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1764a9d4b620SHong Zhang MatMult_MPISBAIJ, 176597304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1766431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1767431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1768f4259b30SLisandro Dalcin NULL, 1769f4259b30SLisandro Dalcin NULL, 1770f4259b30SLisandro Dalcin NULL, 1771f4259b30SLisandro Dalcin /* 10*/ NULL, 1772f4259b30SLisandro Dalcin NULL, 1773f4259b30SLisandro Dalcin NULL, 177441f059aeSBarry Smith MatSOR_MPISBAIJ, 1775a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 177697304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1777a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1778a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1779a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1780a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 178197304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1782a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1783a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1784a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1785f4259b30SLisandro Dalcin /* 24*/ NULL, 1786f4259b30SLisandro Dalcin NULL, 1787f4259b30SLisandro Dalcin NULL, 1788f4259b30SLisandro Dalcin NULL, 1789f4259b30SLisandro Dalcin NULL, 17904994cf47SJed Brown /* 29*/ MatSetUp_MPISBAIJ, 1791f4259b30SLisandro Dalcin NULL, 1792f4259b30SLisandro Dalcin NULL, 1793a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1794f4259b30SLisandro Dalcin NULL, 1795d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1796f4259b30SLisandro Dalcin NULL, 1797f4259b30SLisandro Dalcin NULL, 1798f4259b30SLisandro Dalcin NULL, 1799f4259b30SLisandro Dalcin NULL, 1800d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18017dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1802d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1803a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18043c896bc6SHong Zhang MatCopy_MPISBAIJ, 1805f4259b30SLisandro Dalcin /* 44*/ NULL, 1806a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18077d68702bSBarry Smith MatShift_MPISBAIJ, 1808f4259b30SLisandro Dalcin NULL, 1809f4259b30SLisandro Dalcin NULL, 1810f4259b30SLisandro Dalcin /* 49*/ NULL, 1811f4259b30SLisandro Dalcin NULL, 1812f4259b30SLisandro Dalcin NULL, 1813f4259b30SLisandro Dalcin NULL, 1814f4259b30SLisandro Dalcin NULL, 1815f4259b30SLisandro Dalcin /* 54*/ NULL, 1816f4259b30SLisandro Dalcin NULL, 1817a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1818f4259b30SLisandro Dalcin NULL, 1819a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18207dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1821f4259b30SLisandro Dalcin NULL, 1822f4259b30SLisandro Dalcin NULL, 1823f4259b30SLisandro Dalcin NULL, 1824f4259b30SLisandro Dalcin NULL, 1825f4259b30SLisandro Dalcin /* 64*/ NULL, 1826f4259b30SLisandro Dalcin NULL, 1827f4259b30SLisandro Dalcin NULL, 1828f4259b30SLisandro Dalcin NULL, 1829f4259b30SLisandro Dalcin NULL, 1830d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1831f4259b30SLisandro Dalcin NULL, 183228d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 1833f4259b30SLisandro Dalcin NULL, 1834f4259b30SLisandro Dalcin NULL, 1835f4259b30SLisandro Dalcin /* 74*/ NULL, 1836f4259b30SLisandro Dalcin NULL, 1837f4259b30SLisandro Dalcin NULL, 1838f4259b30SLisandro Dalcin NULL, 1839f4259b30SLisandro Dalcin NULL, 1840f4259b30SLisandro Dalcin /* 79*/ NULL, 1841f4259b30SLisandro Dalcin NULL, 1842f4259b30SLisandro Dalcin NULL, 1843f4259b30SLisandro Dalcin NULL, 18445bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1845f4259b30SLisandro Dalcin /* 84*/ NULL, 1846f4259b30SLisandro Dalcin NULL, 1847f4259b30SLisandro Dalcin NULL, 1848f4259b30SLisandro Dalcin NULL, 1849f4259b30SLisandro Dalcin NULL, 1850f4259b30SLisandro Dalcin /* 89*/ NULL, 1851f4259b30SLisandro Dalcin NULL, 1852f4259b30SLisandro Dalcin NULL, 1853f4259b30SLisandro Dalcin NULL, 1854f4259b30SLisandro Dalcin NULL, 1855f4259b30SLisandro Dalcin /* 94*/ NULL, 1856f4259b30SLisandro Dalcin NULL, 1857f4259b30SLisandro Dalcin NULL, 1858f4259b30SLisandro Dalcin NULL, 1859f4259b30SLisandro Dalcin NULL, 1860f4259b30SLisandro Dalcin /* 99*/ NULL, 1861f4259b30SLisandro Dalcin NULL, 1862f4259b30SLisandro Dalcin NULL, 18632726fb6dSPierre Jolivet MatConjugate_MPISBAIJ, 1864f4259b30SLisandro Dalcin NULL, 1865f4259b30SLisandro Dalcin /*104*/ NULL, 186699cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1867d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1868d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 186995936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 1870f4259b30SLisandro Dalcin /*109*/ NULL, 1871f4259b30SLisandro Dalcin NULL, 1872f4259b30SLisandro Dalcin NULL, 1873f4259b30SLisandro Dalcin NULL, 18743b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 1875f4259b30SLisandro Dalcin /*114*/ NULL, 1876f4259b30SLisandro Dalcin NULL, 1877f4259b30SLisandro Dalcin NULL, 1878f4259b30SLisandro Dalcin NULL, 1879f4259b30SLisandro Dalcin NULL, 1880f4259b30SLisandro Dalcin /*119*/ NULL, 1881f4259b30SLisandro Dalcin NULL, 1882f4259b30SLisandro Dalcin NULL, 1883f4259b30SLisandro Dalcin NULL, 1884f4259b30SLisandro Dalcin NULL, 1885f4259b30SLisandro Dalcin /*124*/ NULL, 1886f4259b30SLisandro Dalcin NULL, 1887f4259b30SLisandro Dalcin NULL, 1888f4259b30SLisandro Dalcin NULL, 1889f4259b30SLisandro Dalcin NULL, 1890f4259b30SLisandro Dalcin /*129*/ NULL, 1891f4259b30SLisandro Dalcin NULL, 1892f4259b30SLisandro Dalcin NULL, 1893f4259b30SLisandro Dalcin NULL, 1894f4259b30SLisandro Dalcin NULL, 1895f4259b30SLisandro Dalcin /*134*/ NULL, 1896f4259b30SLisandro Dalcin NULL, 1897f4259b30SLisandro Dalcin NULL, 1898f4259b30SLisandro Dalcin NULL, 1899f4259b30SLisandro Dalcin NULL, 190046533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1901f4259b30SLisandro Dalcin NULL, 1902f4259b30SLisandro Dalcin NULL, 1903f4259b30SLisandro Dalcin NULL, 1904f4259b30SLisandro Dalcin NULL, 1905d70f29a3SPierre Jolivet /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1906d70f29a3SPierre Jolivet NULL, 1907d70f29a3SPierre Jolivet NULL, 190899a7f59eSMark Adams NULL, 190999a7f59eSMark Adams NULL, 19107fb60732SBarry Smith NULL, 1911dec0b466SHong Zhang /*150*/ NULL, 1912dec0b466SHong Zhang NULL}; 1913a30f8f8cSSatish Balay 1914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1915d71ae5a4SJacob Faibussowitsch { 1916476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1917535b19f3SBarry Smith PetscInt i, mbs, Mbs; 19185d2a9ed1SStefano Zampini PetscMPIInt size; 1919a23d5eceSKris Buschelman 1920a23d5eceSKris Buschelman PetscFunctionBegin; 19219566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 19229566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 19249566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 19255f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 19265f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1927899cda47SBarry Smith 1928d0f46423SBarry Smith mbs = B->rmap->n / bs; 1929d0f46423SBarry Smith Mbs = B->rmap->N / bs; 19305f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1931a23d5eceSKris Buschelman 1932d0f46423SBarry Smith B->rmap->bs = bs; 1933a23d5eceSKris Buschelman b->bs2 = bs * bs; 1934a23d5eceSKris Buschelman b->mbs = mbs; 1935a23d5eceSKris Buschelman b->Mbs = Mbs; 1936de64b629SHong Zhang b->nbs = B->cmap->n / bs; 1937de64b629SHong Zhang b->Nbs = B->cmap->N / bs; 1938a23d5eceSKris Buschelman 1939ad540459SPierre Jolivet for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1940d0f46423SBarry Smith b->rstartbs = B->rmap->rstart / bs; 1941d0f46423SBarry Smith b->rendbs = B->rmap->rend / bs; 1942a23d5eceSKris Buschelman 1943d0f46423SBarry Smith b->cstartbs = B->cmap->rstart / bs; 1944d0f46423SBarry Smith b->cendbs = B->cmap->rend / bs; 1945a23d5eceSKris Buschelman 1946cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 1947eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 1948cb7b82ddSBarry Smith #else 19499566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 1950cb7b82ddSBarry Smith #endif 19519566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 19529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 19539566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 19549566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0)); 19559566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0b)); 19569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1)); 19579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1a)); 19589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1b)); 19599566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->sMvctx)); 1960cb7b82ddSBarry Smith 1961cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 19629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 19639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 19649566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 19659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 19669566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1967cb7b82ddSBarry Smith 1968526dfc15SBarry Smith if (!B->preallocated) { 19699566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 19709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 19719566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 19729566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1973526dfc15SBarry Smith } 1974a23d5eceSKris Buschelman 19759566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 19769566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 197726fbe8dcSKarl Rupp 1978526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 1979cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 1980cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 19813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1982a23d5eceSKris Buschelman } 1983a23d5eceSKris Buschelman 1984d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1985d71ae5a4SJacob Faibussowitsch { 198602106b30SBarry Smith PetscInt m, rstart, cend; 1987f4259b30SLisandro Dalcin PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1988f4259b30SLisandro Dalcin const PetscInt *JJ = NULL; 1989f4259b30SLisandro Dalcin PetscScalar *values = NULL; 1990bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 19913bd0feecSPierre Jolivet PetscBool nooffprocentries; 1992dfb205c3SBarry Smith 1993dfb205c3SBarry Smith PetscFunctionBegin; 19945f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 19959566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 19969566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 19979566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 19999566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2000dfb205c3SBarry Smith m = B->rmap->n / bs; 2001dfb205c3SBarry Smith rstart = B->rmap->rstart / bs; 2002dfb205c3SBarry Smith cend = B->cmap->rend / bs; 2003dfb205c3SBarry Smith 20045f80ce2aSJacob Faibussowitsch PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 20059566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2006dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2007dfb205c3SBarry Smith nz = ii[i + 1] - ii[i]; 20085f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 20090cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2010dfb205c3SBarry Smith JJ = jj + ii[i]; 20110cd7f59aSBarry Smith bd = 0; 2012dfb205c3SBarry Smith for (j = 0; j < nz; j++) { 20130cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2014dfb205c3SBarry Smith JJ++; 20150cd7f59aSBarry Smith bd++; 2016dfb205c3SBarry Smith } 2017dfb205c3SBarry Smith d = 0; 2018dfb205c3SBarry Smith for (; j < nz; j++) { 2019dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2020dfb205c3SBarry Smith d++; 2021dfb205c3SBarry Smith } 2022dfb205c3SBarry Smith d_nnz[i] = d; 20230cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20240cd7f59aSBarry Smith nz = nz - bd; 20250cd7f59aSBarry Smith nz_max = PetscMax(nz_max, nz); 2026dfb205c3SBarry Smith } 20279566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 20289566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 20299566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 2030dfb205c3SBarry Smith 2031dfb205c3SBarry Smith values = (PetscScalar *)V; 203248a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2033dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2034dfb205c3SBarry Smith PetscInt row = i + rstart; 2035dfb205c3SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 2036dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2037bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2038dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 20399566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2040bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2041bb80cfbbSStefano Zampini PetscInt j; 20420cd7f59aSBarry Smith for (j = 0; j < ncols; j++) { 20430cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 20449566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 20450cd7f59aSBarry Smith } 20460cd7f59aSBarry Smith } 2047dfb205c3SBarry Smith } 2048dfb205c3SBarry Smith 20499566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 20503bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 20513bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 20529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 20539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 20543bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 20553bd0feecSPierre Jolivet 20569566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 20573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2058dfb205c3SBarry Smith } 2059dfb205c3SBarry Smith 20600bad9183SKris Buschelman /*MC 2061fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2062828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2063828413b8SBarry Smith the matrix is stored. 2064828413b8SBarry Smith 2065828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 206611a5261eSBarry Smith can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 20670bad9183SKris Buschelman 20680bad9183SKris Buschelman Options Database Keys: 206911a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 20700bad9183SKris Buschelman 207111a5261eSBarry Smith Note: 2072476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2073476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2074476417e5SBarry Smith 20750bad9183SKris Buschelman Level: beginner 20760bad9183SKris Buschelman 207711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 20780bad9183SKris Buschelman M*/ 20790bad9183SKris Buschelman 2080d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2081d71ae5a4SJacob Faibussowitsch { 2082b5df2d14SHong Zhang Mat_MPISBAIJ *b; 208394ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2084b5df2d14SHong Zhang 2085b5df2d14SHong Zhang PetscFunctionBegin; 20864dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 2087b0a32e0cSBarry Smith B->data = (void *)b; 20889566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2089b5df2d14SHong Zhang 2090b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2091b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2092b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2093b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 209426fbe8dcSKarl Rupp 20959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 20969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2097b5df2d14SHong Zhang 2098b5df2d14SHong Zhang /* build local table of row and column ownerships */ 20999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2100b5df2d14SHong Zhang 2101b5df2d14SHong Zhang /* build cache for off array entries formed */ 21029566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 210326fbe8dcSKarl Rupp 2104b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21050298fd71SBarry Smith b->colmap = NULL; 21060298fd71SBarry Smith b->garray = NULL; 2107b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2108b5df2d14SHong Zhang 2109b5df2d14SHong Zhang /* stuff used in block assembly */ 2110f4259b30SLisandro Dalcin b->barray = NULL; 2111b5df2d14SHong Zhang 2112b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2113f4259b30SLisandro Dalcin b->lvec = NULL; 2114f4259b30SLisandro Dalcin b->Mvctx = NULL; 2115f4259b30SLisandro Dalcin b->slvec0 = NULL; 2116f4259b30SLisandro Dalcin b->slvec0b = NULL; 2117f4259b30SLisandro Dalcin b->slvec1 = NULL; 2118f4259b30SLisandro Dalcin b->slvec1a = NULL; 2119f4259b30SLisandro Dalcin b->slvec1b = NULL; 2120f4259b30SLisandro Dalcin b->sMvctx = NULL; 2121b5df2d14SHong Zhang 2122b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2123f4259b30SLisandro Dalcin b->rowindices = NULL; 2124f4259b30SLisandro Dalcin b->rowvalues = NULL; 2125b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2126b5df2d14SHong Zhang 2127b5df2d14SHong Zhang /* hash table stuff */ 2128f4259b30SLisandro Dalcin b->ht = NULL; 2129f4259b30SLisandro Dalcin b->hd = NULL; 2130b5df2d14SHong Zhang b->ht_size = 0; 2131b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2132b5df2d14SHong Zhang b->ht_fact = 0; 2133b5df2d14SHong Zhang b->ht_total_ct = 0; 2134b5df2d14SHong Zhang b->ht_insert_ct = 0; 2135b5df2d14SHong Zhang 21367dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 21377a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 21387a868f3eSHong Zhang 2139f4259b30SLisandro Dalcin b->in_loc = NULL; 2140f4259b30SLisandro Dalcin b->v_loc = NULL; 214159ffdab8SBarry Smith b->n_loc = 0; 214294ae4db5SBarry Smith 21439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 21449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 21459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 21469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 21476214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 21489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 21496214f412SHong Zhang #endif 2150d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 21519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2152d24d4204SJose E. Roman #endif 21539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 21549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2155aa5a9175SDahai Guo 2156b94d7dedSBarry Smith B->symmetric = PETSC_BOOL3_TRUE; 2157b94d7dedSBarry Smith B->structurally_symmetric = PETSC_BOOL3_TRUE; 2158b94d7dedSBarry Smith B->symmetry_eternal = PETSC_TRUE; 2159b94d7dedSBarry Smith B->structural_symmetry_eternal = PETSC_TRUE; 2160eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 2161b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_FALSE; 2162eb1ec7c1SStefano Zampini #else 2163b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_TRUE; 2164eb1ec7c1SStefano Zampini #endif 216513647f61SHong Zhang 21669566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2167d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 21689566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 216994ae4db5SBarry Smith if (flg) { 217094ae4db5SBarry Smith PetscReal fact = 1.39; 21719566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 21729566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 217394ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 21749566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 21759566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 217694ae4db5SBarry Smith } 2177d0609cedSBarry Smith PetscOptionsEnd(); 21783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2179b5df2d14SHong Zhang } 2180b5df2d14SHong Zhang 2181209238afSKris Buschelman /*MC 2182002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2183209238afSKris Buschelman 218411a5261eSBarry Smith This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 218511a5261eSBarry Smith and `MATMPISBAIJ` otherwise. 2186209238afSKris Buschelman 218711a5261eSBarry Smith Options Database Key: 2188c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2189209238afSKris Buschelman 2190209238afSKris Buschelman Level: beginner 2191209238afSKris Buschelman 2192c5dec841SPierre Jolivet .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2193209238afSKris Buschelman M*/ 2194209238afSKris Buschelman 2195b5df2d14SHong Zhang /*@C 2196b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2197b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2198b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2199b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2200b5df2d14SHong Zhang 2201c3339decSBarry Smith Collective 2202b5df2d14SHong Zhang 2203b5df2d14SHong Zhang Input Parameters: 22041c4f3114SJed Brown + B - the matrix 2205bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2206bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2207b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2208b5df2d14SHong Zhang submatrix (same for all local rows) 2209b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22106d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22110298fd71SBarry Smith (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 221295742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2213b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2214b5df2d14SHong Zhang submatrix (same for all local rows). 2215b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2216c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22170298fd71SBarry Smith (possibly different for each block row) or NULL. 2218b5df2d14SHong Zhang 2219b5df2d14SHong Zhang Options Database Keys: 2220a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2221b5df2d14SHong Zhang block calculations (much slower) 2222a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2223b5df2d14SHong Zhang 2224b5df2d14SHong Zhang Notes: 2225b5df2d14SHong Zhang 222611a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2227b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2228b5df2d14SHong Zhang 222949a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 223049a6f317SBarry Smith 2231b5df2d14SHong Zhang Storage Information: 2232b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2233b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2234b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2235b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2236b5df2d14SHong Zhang 2237b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 2238b5df2d14SHong Zhang the local submatrix with either d_nz or d_nnz (not both). Set 223911a5261eSBarry Smith d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2240b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 2241b5df2d14SHong Zhang off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2242b5df2d14SHong Zhang 224311a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 2244aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2245aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2246aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2247aa95bbe8SBarry Smith 2248b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2249b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2250b5df2d14SHong Zhang 2251b5df2d14SHong Zhang .vb 2252b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2253a4b1a0f6SJed Brown -------------------------- 2254c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2255c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2256c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2257a4b1a0f6SJed Brown -------------------------- 2258b5df2d14SHong Zhang .ve 2259b5df2d14SHong Zhang 2260b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2261b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 22626d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 226311a5261eSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2264b5df2d14SHong Zhang 22656d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 22666d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2267c2fc9fa9SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix 2268c2fc9fa9SBarry Smith 2269b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 2270b5df2d14SHong Zhang one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2271b5df2d14SHong Zhang or you will get TERRIBLE performance; see the users' manual chapter on 2272b5df2d14SHong Zhang matrices. 2273b5df2d14SHong Zhang 2274b5df2d14SHong Zhang Level: intermediate 2275b5df2d14SHong Zhang 227611a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2277b5df2d14SHong Zhang @*/ 2278d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2279d71ae5a4SJacob Faibussowitsch { 2280b5df2d14SHong Zhang PetscFunctionBegin; 22816ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 22826ba663aaSJed Brown PetscValidType(B, 1); 22836ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2284cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 22853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2286b5df2d14SHong Zhang } 2287b5df2d14SHong Zhang 2288a30f8f8cSSatish Balay /*@C 228911a5261eSBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2290a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2291a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2292a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2293a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2294a30f8f8cSSatish Balay 2295d083f849SBarry Smith Collective 2296a30f8f8cSSatish Balay 2297a30f8f8cSSatish Balay Input Parameters: 2298a30f8f8cSSatish Balay + comm - MPI communicator 229911a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2300bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 230111a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2302a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2303a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 230411a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2305a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2306a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 230711a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 230811a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2309a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2310a30f8f8cSSatish Balay submatrix (same for all local rows) 2311a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23126d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23130298fd71SBarry Smith (possibly different for each block block row) or NULL. 231495742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 231595742e49SBarry Smith set its value even if it is zero. 2316a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2317a30f8f8cSSatish Balay submatrix (same for all local rows). 2318a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2319a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23200298fd71SBarry Smith each block row) or NULL. 2321a30f8f8cSSatish Balay 2322a30f8f8cSSatish Balay Output Parameter: 2323a30f8f8cSSatish Balay . A - the matrix 2324a30f8f8cSSatish Balay 2325a30f8f8cSSatish Balay Options Database Keys: 2326a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2327a30f8f8cSSatish Balay block calculations (much slower) 2328a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2329a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2330a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2331a30f8f8cSSatish Balay 233211a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2333f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 233411a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2335175b88e8SBarry Smith 2336a30f8f8cSSatish Balay Notes: 2337d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 23386d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2339d1be2dadSMatthew Knepley 2340a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2341a30f8f8cSSatish Balay (possibly both). 2342a30f8f8cSSatish Balay 234311a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2344a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2345a30f8f8cSSatish Balay 234649a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 234749a6f317SBarry Smith 2348a30f8f8cSSatish Balay Storage Information: 2349a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2350a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2351a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2352a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2353a30f8f8cSSatish Balay 2354a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2355a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 23560298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2357a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2358a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2359a30f8f8cSSatish Balay 2360a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2361a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2362a30f8f8cSSatish Balay 2363a30f8f8cSSatish Balay .vb 2364a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2365a4b1a0f6SJed Brown -------------------------- 2366c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2367c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2368c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2369a4b1a0f6SJed Brown -------------------------- 2370a30f8f8cSSatish Balay .ve 2371a30f8f8cSSatish Balay 2372a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2373a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 23746d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 237511a5261eSBarry Smith MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2376a30f8f8cSSatish Balay 23776d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 23786d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2379a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2380a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2381a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2382a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2383a30f8f8cSSatish Balay matrices. 2384a30f8f8cSSatish Balay 2385a30f8f8cSSatish Balay Level: intermediate 2386a30f8f8cSSatish Balay 238711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2388a30f8f8cSSatish Balay @*/ 2389a30f8f8cSSatish Balay 2390d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2391d71ae5a4SJacob Faibussowitsch { 23921302d50aSBarry Smith PetscMPIInt size; 2393a30f8f8cSSatish Balay 2394a30f8f8cSSatish Balay PetscFunctionBegin; 23959566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 23969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 23979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2398273d9f13SBarry Smith if (size > 1) { 23999566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPISBAIJ)); 24009566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2401273d9f13SBarry Smith } else { 24029566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQSBAIJ)); 24039566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2404273d9f13SBarry Smith } 24053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2406a30f8f8cSSatish Balay } 2407a30f8f8cSSatish Balay 2408d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2409d71ae5a4SJacob Faibussowitsch { 2410a30f8f8cSSatish Balay Mat mat; 2411a30f8f8cSSatish Balay Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2412d0f46423SBarry Smith PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2413387bc808SHong Zhang PetscScalar *array; 2414a30f8f8cSSatish Balay 2415a30f8f8cSSatish Balay PetscFunctionBegin; 2416f4259b30SLisandro Dalcin *newmat = NULL; 241726fbe8dcSKarl Rupp 24189566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 24199566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 24209566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 24219566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 24229566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2423e1b6402fSHong Zhang 2424d5f3da31SBarry Smith mat->factortype = matin->factortype; 2425273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 242682327fa8SHong Zhang mat->assembled = PETSC_TRUE; 24277fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24287fff6886SHong Zhang 2429b5df2d14SHong Zhang a = (Mat_MPISBAIJ *)mat->data; 2430a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2431a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2432a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2433a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2434a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2435a30f8f8cSSatish Balay 2436a30f8f8cSSatish Balay a->size = oldmat->size; 2437a30f8f8cSSatish Balay a->rank = oldmat->rank; 2438a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2439a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2440f4259b30SLisandro Dalcin a->rowindices = NULL; 2441f4259b30SLisandro Dalcin a->rowvalues = NULL; 2442a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2443f4259b30SLisandro Dalcin a->barray = NULL; 2444899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2445899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2446899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2447899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2448a30f8f8cSSatish Balay 2449a30f8f8cSSatish Balay /* hash table stuff */ 2450f4259b30SLisandro Dalcin a->ht = NULL; 2451f4259b30SLisandro Dalcin a->hd = NULL; 2452a30f8f8cSSatish Balay a->ht_size = 0; 2453a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2454a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2455a30f8f8cSSatish Balay a->ht_total_ct = 0; 2456a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2457a30f8f8cSSatish Balay 24589566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2459a30f8f8cSSatish Balay if (oldmat->colmap) { 2460a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2461eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2462a30f8f8cSSatish Balay #else 24639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 24649566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2465a30f8f8cSSatish Balay #endif 2466f4259b30SLisandro Dalcin } else a->colmap = NULL; 2467387bc808SHong Zhang 2468a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 24699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len, &a->garray)); 24709566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2471f4259b30SLisandro Dalcin } else a->garray = NULL; 2472a30f8f8cSSatish Balay 24739566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 24749566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 24759566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 247682327fa8SHong Zhang 24779566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 24789566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2479387bc808SHong Zhang 24809566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(a->slvec1, &nt)); 24819566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec1, &array)); 24829566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 24839566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 24849566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec1, &array)); 24859566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &array)); 24869566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 24879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &array)); 2488387bc808SHong Zhang 2489387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 24909566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2491387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 249282327fa8SHong Zhang 24939566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 24949566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 24959566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2496a30f8f8cSSatish Balay *newmat = mat; 24973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2498a30f8f8cSSatish Balay } 2499a30f8f8cSSatish Balay 2500618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2501618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2502618cc2edSLisandro Dalcin 2503d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2504d71ae5a4SJacob Faibussowitsch { 25057f489da9SVaclav Hapla PetscBool isbinary; 250695936485SShri Abhyankar 250795936485SShri Abhyankar PetscFunctionBegin; 25089566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 25095f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 25109566063dSJacob Faibussowitsch PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 25113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 251295936485SShri Abhyankar } 251395936485SShri Abhyankar 2514d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2515d71ae5a4SJacob Faibussowitsch { 251624d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2517f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2518ca54ac64SHong Zhang PetscReal atmp; 251987828ca2SBarry Smith PetscReal *work, *svalues, *rvalues; 25201302d50aSBarry Smith PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 25211302d50aSBarry Smith PetscMPIInt rank, size; 25221302d50aSBarry Smith PetscInt *rowners_bs, dest, count, source; 252387828ca2SBarry Smith PetscScalar *va; 25248a1c53f2SBarry Smith MatScalar *ba; 2525f4c0e9e4SHong Zhang MPI_Status stat; 252624d5174aSHong Zhang 252724d5174aSHong Zhang PetscFunctionBegin; 25285f80ce2aSJacob Faibussowitsch PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 25299566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 25309566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &va)); 2531f4c0e9e4SHong Zhang 25329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 25339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2534f4c0e9e4SHong Zhang 2535d0f46423SBarry Smith bs = A->rmap->bs; 2536f4c0e9e4SHong Zhang mbs = a->mbs; 2537f4c0e9e4SHong Zhang Mbs = a->Mbs; 2538f4c0e9e4SHong Zhang ba = b->a; 2539f4c0e9e4SHong Zhang bi = b->i; 2540f4c0e9e4SHong Zhang bj = b->j; 2541f4c0e9e4SHong Zhang 2542f4c0e9e4SHong Zhang /* find ownerships */ 2543d0f46423SBarry Smith rowners_bs = A->rmap->range; 2544f4c0e9e4SHong Zhang 2545f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 25469566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs * Mbs, &work)); 2547f4c0e9e4SHong Zhang 2548f4c0e9e4SHong Zhang /* row_max for B */ 2549b8475685SHong Zhang if (rank != size - 1) { 2550f4c0e9e4SHong Zhang for (i = 0; i < mbs; i++) { 25519371c9d4SSatish Balay ncols = bi[1] - bi[0]; 25529371c9d4SSatish Balay bi++; 2553f4c0e9e4SHong Zhang brow = bs * i; 2554f4c0e9e4SHong Zhang for (j = 0; j < ncols; j++) { 2555f4c0e9e4SHong Zhang bcol = bs * (*bj); 2556f4c0e9e4SHong Zhang for (kcol = 0; kcol < bs; kcol++) { 2557ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 255804d41228SHong Zhang col += rowners_bs[rank + 1]; /* global col index */ 2559f4c0e9e4SHong Zhang for (krow = 0; krow < bs; krow++) { 25609371c9d4SSatish Balay atmp = PetscAbsScalar(*ba); 25619371c9d4SSatish Balay ba++; 2562ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2563ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2564f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2565f4c0e9e4SHong Zhang } 2566f4c0e9e4SHong Zhang } 2567f4c0e9e4SHong Zhang bj++; 2568f4c0e9e4SHong Zhang } 2569f4c0e9e4SHong Zhang } 2570f4c0e9e4SHong Zhang 2571f4c0e9e4SHong Zhang /* send values to its owners */ 2572f4c0e9e4SHong Zhang for (dest = rank + 1; dest < size; dest++) { 2573f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2574ca54ac64SHong Zhang count = rowners_bs[dest + 1] - rowners_bs[dest]; 25759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2576ca54ac64SHong Zhang } 2577f4c0e9e4SHong Zhang } 2578f4c0e9e4SHong Zhang 2579f4c0e9e4SHong Zhang /* receive values */ 2580ca54ac64SHong Zhang if (rank) { 2581f4c0e9e4SHong Zhang rvalues = work; 2582ca54ac64SHong Zhang count = rowners_bs[rank + 1] - rowners_bs[rank]; 2583f4c0e9e4SHong Zhang for (source = 0; source < rank; source++) { 25849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2585f4c0e9e4SHong Zhang /* process values */ 2586f4c0e9e4SHong Zhang for (i = 0; i < count; i++) { 2587ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2588f4c0e9e4SHong Zhang } 2589f4c0e9e4SHong Zhang } 2590ca54ac64SHong Zhang } 2591f4c0e9e4SHong Zhang 25929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &va)); 25939566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 25943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259524d5174aSHong Zhang } 25962798e883SHong Zhang 2597d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2598d71ae5a4SJacob Faibussowitsch { 25992798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2600d0f46423SBarry Smith PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 26013649974fSBarry Smith PetscScalar *x, *ptr, *from; 2602ffe4fb16SHong Zhang Vec bb1; 26033649974fSBarry Smith const PetscScalar *b; 2604ffe4fb16SHong Zhang 2605ffe4fb16SHong Zhang PetscFunctionBegin; 26065f80ce2aSJacob Faibussowitsch PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 26075f80ce2aSJacob Faibussowitsch PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2608ffe4fb16SHong Zhang 2609a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 26109566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 26113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2612a2b30743SBarry Smith } 2613a2b30743SBarry Smith 2614ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2615ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 26169566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2617ffe4fb16SHong Zhang its--; 2618ffe4fb16SHong Zhang } 2619ffe4fb16SHong Zhang 26209566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &bb1)); 2621ffe4fb16SHong Zhang while (its--) { 2622ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 26239566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2624ffe4fb16SHong Zhang 2625ffe4fb16SHong Zhang /* copy xx into slvec0a */ 26269566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &ptr)); 26279566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 26299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2630ffe4fb16SHong Zhang 26319566063dSJacob Faibussowitsch PetscCall(VecScale(mat->slvec0, -1.0)); 2632ffe4fb16SHong Zhang 2633ffe4fb16SHong Zhang /* copy bb into slvec1a */ 26349566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1, &ptr)); 26359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 26379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2638ffe4fb16SHong Zhang 2639ffe4fb16SHong Zhang /* set slvec1b = 0 */ 26409566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 2641ffe4fb16SHong Zhang 26429566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 26439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 26449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 26459566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2646ffe4fb16SHong Zhang 2647ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 26489566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2649ffe4fb16SHong Zhang 2650ffe4fb16SHong Zhang /* local diagonal sweep */ 26519566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2652ffe4fb16SHong Zhang } 26539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 2654fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26559566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2656fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26579566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2658fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2659fa22f6d0SBarry Smith Vec xx1; 2660ace3abfcSBarry Smith PetscBool hasop; 266120f1ed55SBarry Smith const PetscScalar *diag; 2662887ee2caSBarry Smith PetscScalar *sl, scale = (omega - 2.0) / omega; 266320f1ed55SBarry Smith PetscInt i, n; 2664fa22f6d0SBarry Smith 2665fa22f6d0SBarry Smith if (!mat->xx1) { 26669566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->xx1)); 26679566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->bb1)); 2668fa22f6d0SBarry Smith } 2669fa22f6d0SBarry Smith xx1 = mat->xx1; 2670fa22f6d0SBarry Smith bb1 = mat->bb1; 2671fa22f6d0SBarry Smith 26729566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2673fa22f6d0SBarry Smith 2674fa22f6d0SBarry Smith if (!mat->diag) { 2675effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 26769566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 26779566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin, mat->diag)); 2678fa22f6d0SBarry Smith } 26799566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2680fa22f6d0SBarry Smith 2681fa22f6d0SBarry Smith if (hasop) { 26829566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 26839566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 268420f1ed55SBarry Smith } else { 268520f1ed55SBarry Smith /* 268620f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 26879566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 26889566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 268920f1ed55SBarry Smith */ 26909566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1a, &sl)); 26919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mat->diag, &diag)); 26929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26939566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26949566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &n)); 2695887ee2caSBarry Smith if (omega == 1.0) { 269626fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 26979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * n)); 2698887ee2caSBarry Smith } else { 269926fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 27009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * n)); 2701887ee2caSBarry Smith } 27029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 27039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 27049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 27059566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 270620f1ed55SBarry Smith } 2707fa22f6d0SBarry Smith 2708fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 27099566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 27109566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 27119566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &from)); 27129566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 27139566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 27149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &from)); 27159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 27169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27189566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2719fa22f6d0SBarry Smith 2720fa22f6d0SBarry Smith /* local sweep */ 27219566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 27229566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx, 1.0, xx1)); 2723f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 27243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2725ffe4fb16SHong Zhang } 2726ffe4fb16SHong Zhang 2727dfb205c3SBarry Smith /*@ 272811a5261eSBarry Smith MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2729dfb205c3SBarry Smith CSR format the local rows. 2730dfb205c3SBarry Smith 2731d083f849SBarry Smith Collective 2732dfb205c3SBarry Smith 2733dfb205c3SBarry Smith Input Parameters: 2734dfb205c3SBarry Smith + comm - MPI communicator 2735dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 273611a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 2737dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 273811a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2739dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 274011a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 274111a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2742483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2743dfb205c3SBarry Smith . j - column indices 2744dfb205c3SBarry Smith - a - matrix values 2745dfb205c3SBarry Smith 2746dfb205c3SBarry Smith Output Parameter: 2747dfb205c3SBarry Smith . mat - the matrix 2748dfb205c3SBarry Smith 2749dfb205c3SBarry Smith Level: intermediate 2750dfb205c3SBarry Smith 2751dfb205c3SBarry Smith Notes: 2752dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2753dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 2754dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2755dfb205c3SBarry Smith 2756dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2757dfb205c3SBarry Smith 275811a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2759db781477SPatrick Sanan `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2760dfb205c3SBarry Smith @*/ 2761d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2762d71ae5a4SJacob Faibussowitsch { 2763dfb205c3SBarry Smith PetscFunctionBegin; 27645f80ce2aSJacob Faibussowitsch PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 27655f80ce2aSJacob Faibussowitsch PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 27669566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 27679566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 27689566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPISBAIJ)); 27699566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 27703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2771dfb205c3SBarry Smith } 2772dfb205c3SBarry Smith 2773dfb205c3SBarry Smith /*@C 277411a5261eSBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2775dfb205c3SBarry Smith 2776d083f849SBarry Smith Collective 2777dfb205c3SBarry Smith 2778dfb205c3SBarry Smith Input Parameters: 27791c4f3114SJed Brown + B - the matrix 2780dfb205c3SBarry Smith . bs - the block size 2781dfb205c3SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2782dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2783dfb205c3SBarry Smith - v - optional values in the matrix 2784dfb205c3SBarry Smith 2785664954b6SBarry Smith Level: advanced 2786664954b6SBarry Smith 2787664954b6SBarry Smith Notes: 27880cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 27890cd7f59aSBarry Smith and usually the numerical values as well 27900cd7f59aSBarry Smith 279150c5228eSBarry Smith Any entries below the diagonal are ignored 2792dfb205c3SBarry Smith 279311a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2794dfb205c3SBarry Smith @*/ 2795d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2796d71ae5a4SJacob Faibussowitsch { 2797dfb205c3SBarry Smith PetscFunctionBegin; 2798cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 27993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2800dfb205c3SBarry Smith } 2801dfb205c3SBarry Smith 2802d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2803d71ae5a4SJacob Faibussowitsch { 280410c56fdeSHong Zhang PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 280510c56fdeSHong Zhang PetscInt *indx; 280610c56fdeSHong Zhang PetscScalar *values; 2807dfb205c3SBarry Smith 28084dcd73b1SHong Zhang PetscFunctionBegin; 28099566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 281010c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 281110c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2812de25e9cbSPierre Jolivet PetscInt *dnz, *onz, mbs, Nbs, nbs; 281310c56fdeSHong Zhang PetscInt *bindx, rmax = a->rmax, j; 2814de25e9cbSPierre Jolivet PetscMPIInt rank, size; 28154dcd73b1SHong Zhang 28169566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28179371c9d4SSatish Balay mbs = m / bs; 28189371c9d4SSatish Balay Nbs = N / cbs; 281948a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2820da91a574SPierre Jolivet nbs = n / cbs; 28214dcd73b1SHong Zhang 28229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(rmax, &bindx)); 2823d0609cedSBarry Smith MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2824de25e9cbSPierre Jolivet 28259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 28269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &size)); 2827de25e9cbSPierre Jolivet if (rank == size - 1) { 2828de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 28295f80ce2aSJacob Faibussowitsch PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2830de25e9cbSPierre Jolivet } 2831de25e9cbSPierre Jolivet 2832d0609cedSBarry Smith rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 28339566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 283410c56fdeSHong Zhang for (i = 0; i < mbs; i++) { 28359566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 28364dcd73b1SHong Zhang nnz = nnz / bs; 28374dcd73b1SHong Zhang for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 28389566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 28399566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 28404dcd73b1SHong Zhang } 28419566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28429566063dSJacob Faibussowitsch PetscCall(PetscFree(bindx)); 28434dcd73b1SHong Zhang 28449566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 28459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 28469566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 28479566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, MATSBAIJ)); 28489566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 28499566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2850d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 28514dcd73b1SHong Zhang } 28524dcd73b1SHong Zhang 285310c56fdeSHong Zhang /* numeric phase */ 28549566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28559566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 28564dcd73b1SHong Zhang 28579566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 28584dcd73b1SHong Zhang for (i = 0; i < m; i++) { 28599566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28604dcd73b1SHong Zhang Ii = i + rstart; 28619566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 28629566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28634dcd73b1SHong Zhang } 28649566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28659566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 28669566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 28673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28684dcd73b1SHong Zhang } 2869