1a30f8f8cSSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 6a30f8f8cSSatish Balay 76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 96214f412SHong Zhang #endif 10d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 11d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 12d24d4204SJose E. Roman #endif 13b147fbf3SStefano Zampini 14b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 15d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16d71ae5a4SJacob Faibussowitsch { 17b147fbf3SStefano Zampini Mat preallocator; 18b147fbf3SStefano Zampini PetscInt r, rstart, rend; 19b147fbf3SStefano Zampini PetscInt bs, i, m, n, M, N; 20b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 21b147fbf3SStefano Zampini 22b147fbf3SStefano Zampini PetscFunctionBegin; 23b147fbf3SStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 24b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B, nm, 2); 25b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 26b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 279566063dSJacob Faibussowitsch PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 285f80ce2aSJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 29b147fbf3SStefano Zampini } 30b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B, fill, 5); 319566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(B, &bs)); 329566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &N)); 339566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, &n)); 349566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 359566063dSJacob Faibussowitsch PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 369566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(preallocator, bs)); 379566063dSJacob Faibussowitsch PetscCall(MatSetSizes(preallocator, m, n, M, N)); 389566063dSJacob Faibussowitsch PetscCall(MatSetUp(preallocator)); 399566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 40b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 41b147fbf3SStefano Zampini PetscInt ncols; 42b147fbf3SStefano Zampini const PetscInt *row; 43b147fbf3SStefano Zampini const PetscScalar *vals; 44b147fbf3SStefano Zampini 45b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 469566063dSJacob Faibussowitsch PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 479566063dSJacob Faibussowitsch PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 4848a46eb9SPierre Jolivet if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 499566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 50b147fbf3SStefano Zampini } 51b147fbf3SStefano Zampini } 529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 549566063dSJacob Faibussowitsch PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&preallocator)); 56b147fbf3SStefano Zampini PetscFunctionReturn(0); 57b147fbf3SStefano Zampini } 58b147fbf3SStefano Zampini 59d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60d71ae5a4SJacob Faibussowitsch { 61b147fbf3SStefano Zampini Mat B; 62b147fbf3SStefano Zampini PetscInt r; 63b147fbf3SStefano Zampini 64b147fbf3SStefano Zampini PetscFunctionBegin; 65b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 6628d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE, isdense; 67b147fbf3SStefano Zampini PetscInt bs; 68b147fbf3SStefano Zampini 699566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 719566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 729566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 739566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, bs)); 749566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 759566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 769566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 7728d58a37SPierre Jolivet if (!isdense) { 789566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 799566063dSJacob Faibussowitsch PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 809566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 8128d58a37SPierre Jolivet } else { 829566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 8328d58a37SPierre Jolivet } 8428d58a37SPierre Jolivet } else { 8528d58a37SPierre Jolivet B = *newmat; 869566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(B)); 8728d58a37SPierre Jolivet } 88b147fbf3SStefano Zampini 899566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 90b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 91b147fbf3SStefano Zampini PetscInt ncols; 92b147fbf3SStefano Zampini const PetscInt *row; 93b147fbf3SStefano Zampini const PetscScalar *vals; 94b147fbf3SStefano Zampini 959566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 969566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 97eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 98b94d7dedSBarry Smith if (A->hermitian == PETSC_BOOL3_TRUE) { 99eb1ec7c1SStefano Zampini PetscInt i; 10048a46eb9SPierre Jolivet for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 101eb1ec7c1SStefano Zampini } else { 1029566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 103eb1ec7c1SStefano Zampini } 104eb1ec7c1SStefano Zampini #else 1059566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 106eb1ec7c1SStefano Zampini #endif 1079566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 108b147fbf3SStefano Zampini } 1099566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 1109566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1119566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 112b147fbf3SStefano Zampini 113b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 1149566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 115b147fbf3SStefano Zampini } else { 116b147fbf3SStefano Zampini *newmat = B; 117b147fbf3SStefano Zampini } 118b147fbf3SStefano Zampini PetscFunctionReturn(0); 119b147fbf3SStefano Zampini } 120b147fbf3SStefano Zampini 121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 122d71ae5a4SJacob Faibussowitsch { 123f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 124a30f8f8cSSatish Balay 125a30f8f8cSSatish Balay PetscFunctionBegin; 1269566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 1279566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 128a30f8f8cSSatish Balay PetscFunctionReturn(0); 129a30f8f8cSSatish Balay } 130a30f8f8cSSatish Balay 131d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 132d71ae5a4SJacob Faibussowitsch { 133f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 134a30f8f8cSSatish Balay 135a30f8f8cSSatish Balay PetscFunctionBegin; 1369566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 1379566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 138a30f8f8cSSatish Balay PetscFunctionReturn(0); 139a30f8f8cSSatish Balay } 140a30f8f8cSSatish Balay 141d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 142a30f8f8cSSatish Balay { \ 143a30f8f8cSSatish Balay brow = row / bs; \ 1449371c9d4SSatish Balay rp = aj + ai[brow]; \ 1459371c9d4SSatish Balay ap = aa + bs2 * ai[brow]; \ 1469371c9d4SSatish Balay rmax = aimax[brow]; \ 1479371c9d4SSatish Balay nrow = ailen[brow]; \ 148a30f8f8cSSatish Balay bcol = col / bs; \ 1499371c9d4SSatish Balay ridx = row % bs; \ 1509371c9d4SSatish Balay cidx = col % bs; \ 1519371c9d4SSatish Balay low = 0; \ 1529371c9d4SSatish Balay high = nrow; \ 153a30f8f8cSSatish Balay while (high - low > 3) { \ 154a30f8f8cSSatish Balay t = (low + high) / 2; \ 155a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 156a30f8f8cSSatish Balay else low = t; \ 157a30f8f8cSSatish Balay } \ 158a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 159a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 160a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 161a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 162a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 163a30f8f8cSSatish Balay else *bap = value; \ 164a30f8f8cSSatish Balay goto a_noinsert; \ 165a30f8f8cSSatish Balay } \ 166a30f8f8cSSatish Balay } \ 167a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 1685f80ce2aSJacob Faibussowitsch PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 170a30f8f8cSSatish Balay N = nrow++ - 1; \ 171a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 1729566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 1739566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 1749566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 175a30f8f8cSSatish Balay rp[_i] = bcol; \ 176a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 177e56f5c9eSBarry Smith A->nonzerostate++; \ 178a30f8f8cSSatish Balay a_noinsert:; \ 179a30f8f8cSSatish Balay ailen[brow] = nrow; \ 180a30f8f8cSSatish Balay } 181e5e170daSBarry Smith 182d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 183a30f8f8cSSatish Balay { \ 184a30f8f8cSSatish Balay brow = row / bs; \ 1859371c9d4SSatish Balay rp = bj + bi[brow]; \ 1869371c9d4SSatish Balay ap = ba + bs2 * bi[brow]; \ 1879371c9d4SSatish Balay rmax = bimax[brow]; \ 1889371c9d4SSatish Balay nrow = bilen[brow]; \ 189a30f8f8cSSatish Balay bcol = col / bs; \ 1909371c9d4SSatish Balay ridx = row % bs; \ 1919371c9d4SSatish Balay cidx = col % bs; \ 1929371c9d4SSatish Balay low = 0; \ 1939371c9d4SSatish Balay high = nrow; \ 194a30f8f8cSSatish Balay while (high - low > 3) { \ 195a30f8f8cSSatish Balay t = (low + high) / 2; \ 196a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 197a30f8f8cSSatish Balay else low = t; \ 198a30f8f8cSSatish Balay } \ 199a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 200a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 201a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 202a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 203a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 204a30f8f8cSSatish Balay else *bap = value; \ 205a30f8f8cSSatish Balay goto b_noinsert; \ 206a30f8f8cSSatish Balay } \ 207a30f8f8cSSatish Balay } \ 208a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 2095f80ce2aSJacob Faibussowitsch PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 210fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 211a30f8f8cSSatish Balay N = nrow++ - 1; \ 212a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2139566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2149566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2159566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 216a30f8f8cSSatish Balay rp[_i] = bcol; \ 217a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 218e56f5c9eSBarry Smith B->nonzerostate++; \ 219a30f8f8cSSatish Balay b_noinsert:; \ 220a30f8f8cSSatish Balay bilen[brow] = nrow; \ 221a30f8f8cSSatish Balay } 222a30f8f8cSSatish Balay 223a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 224476417e5SBarry Smith Any a(i,j) with i>j input by user is ingored or generates an error 225a30f8f8cSSatish Balay */ 226d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 227d71ae5a4SJacob Faibussowitsch { 228a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 229a30f8f8cSSatish Balay MatScalar value; 230ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 2311302d50aSBarry Smith PetscInt i, j, row, col; 232d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 233d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 234d0f46423SBarry Smith PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 235a30f8f8cSSatish Balay 236a30f8f8cSSatish Balay /* Some Variables required in the macro */ 237a30f8f8cSSatish Balay Mat A = baij->A; 238a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 2391302d50aSBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 240a30f8f8cSSatish Balay MatScalar *aa = a->a; 241a30f8f8cSSatish Balay 242a30f8f8cSSatish Balay Mat B = baij->B; 243a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 2441302d50aSBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 245a30f8f8cSSatish Balay MatScalar *ba = b->a; 246a30f8f8cSSatish Balay 2471302d50aSBarry Smith PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 2481302d50aSBarry Smith PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 249a30f8f8cSSatish Balay MatScalar *ap, *bap; 250a30f8f8cSSatish Balay 251a30f8f8cSSatish Balay /* for stash */ 2520298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 2530298fd71SBarry Smith MatScalar *v_loc = NULL; 254a30f8f8cSSatish Balay 255a30f8f8cSSatish Balay PetscFunctionBegin; 256a30f8f8cSSatish Balay if (!baij->donotstash) { 25759ffdab8SBarry Smith if (n > baij->n_loc) { 2589566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 2599566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 2609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->in_loc)); 2619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->v_loc)); 26226fbe8dcSKarl Rupp 26359ffdab8SBarry Smith baij->n_loc = n; 26459ffdab8SBarry Smith } 26559ffdab8SBarry Smith in_loc = baij->in_loc; 26659ffdab8SBarry Smith v_loc = baij->v_loc; 267a30f8f8cSSatish Balay } 268a30f8f8cSSatish Balay 269a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 270a30f8f8cSSatish Balay if (im[i] < 0) continue; 2715f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 272a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 274a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 27501b2bd88SHong Zhang if (im[i] / bs > in[j] / bs) { 27601b2bd88SHong Zhang if (a->ignore_ltriangular) { 27701b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 27826fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 27901b2bd88SHong Zhang } 280a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 2829371c9d4SSatish Balay brow = row / bs; 2839371c9d4SSatish Balay bcol = col / bs; 284a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 286db4deed7SKarl Rupp else value = v[i + j * m]; 287d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 2889566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 289f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 290f7d195e4SLawrence Mitchell continue; 291f7d195e4SLawrence Mitchell } else { 292f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 293f7d195e4SLawrence Mitchell /* off-diag entry (B) */ 294a30f8f8cSSatish Balay if (mat->was_assembled) { 29548a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 296a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 297*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 29871730473SSatish Balay col = col - 1; 299a30f8f8cSSatish Balay #else 30071730473SSatish Balay col = baij->colmap[in[j] / bs] - 1; 301a30f8f8cSSatish Balay #endif 302a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 3039566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 304a30f8f8cSSatish Balay col = in[j]; 305a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306a30f8f8cSSatish Balay B = baij->B; 307a30f8f8cSSatish Balay b = (Mat_SeqBAIJ *)(B)->data; 3089371c9d4SSatish Balay bimax = b->imax; 3099371c9d4SSatish Balay bi = b->i; 3109371c9d4SSatish Balay bilen = b->ilen; 3119371c9d4SSatish Balay bj = b->j; 312a30f8f8cSSatish Balay ba = b->a; 31371730473SSatish Balay } else col += in[j] % bs; 314a30f8f8cSSatish Balay } else col = in[j]; 315db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 316db4deed7SKarl Rupp else value = v[i + j * m]; 317d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 3189566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 319a30f8f8cSSatish Balay } 320a30f8f8cSSatish Balay } 321a30f8f8cSSatish Balay } else { /* off processor entry */ 3225f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 323a30f8f8cSSatish Balay if (!baij->donotstash) { 3245080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 325a30f8f8cSSatish Balay n_loc = 0; 326a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 327f65c83cfSHong Zhang if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 328a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 329a30f8f8cSSatish Balay if (roworiented) { 330a30f8f8cSSatish Balay v_loc[n_loc] = v[i * n + j]; 331a30f8f8cSSatish Balay } else { 332a30f8f8cSSatish Balay v_loc[n_loc] = v[j * m + i]; 333a30f8f8cSSatish Balay } 334a30f8f8cSSatish Balay n_loc++; 335a30f8f8cSSatish Balay } 3369566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 337a30f8f8cSSatish Balay } 338a30f8f8cSSatish Balay } 339a30f8f8cSSatish Balay } 340a30f8f8cSSatish Balay PetscFunctionReturn(0); 341a30f8f8cSSatish Balay } 342a30f8f8cSSatish Balay 343d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 344d71ae5a4SJacob Faibussowitsch { 34536bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 34636bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 34736bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 34836bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 34936bd2089SBarry Smith PetscBool roworiented = a->roworiented; 35036bd2089SBarry Smith const PetscScalar *value = v; 35136bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 35236bd2089SBarry Smith 35336bd2089SBarry Smith PetscFunctionBegin; 35436bd2089SBarry Smith if (col < row) { 35536bd2089SBarry Smith if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 35636bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 35736bd2089SBarry Smith } 35836bd2089SBarry Smith rp = aj + ai[row]; 35936bd2089SBarry Smith ap = aa + bs2 * ai[row]; 36036bd2089SBarry Smith rmax = imax[row]; 36136bd2089SBarry Smith nrow = ailen[row]; 36236bd2089SBarry Smith value = v; 36336bd2089SBarry Smith low = 0; 36436bd2089SBarry Smith high = nrow; 36536bd2089SBarry Smith 36636bd2089SBarry Smith while (high - low > 7) { 36736bd2089SBarry Smith t = (low + high) / 2; 36836bd2089SBarry Smith if (rp[t] > col) high = t; 36936bd2089SBarry Smith else low = t; 37036bd2089SBarry Smith } 37136bd2089SBarry Smith for (i = low; i < high; i++) { 37236bd2089SBarry Smith if (rp[i] > col) break; 37336bd2089SBarry Smith if (rp[i] == col) { 37436bd2089SBarry Smith bap = ap + bs2 * i; 37536bd2089SBarry Smith if (roworiented) { 37636bd2089SBarry Smith if (is == ADD_VALUES) { 37736bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 378ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 37936bd2089SBarry Smith } 38036bd2089SBarry Smith } else { 38136bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 382ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 38336bd2089SBarry Smith } 38436bd2089SBarry Smith } 38536bd2089SBarry Smith } else { 38636bd2089SBarry Smith if (is == ADD_VALUES) { 38736bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 388ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ += *value++; 38936bd2089SBarry Smith } 39036bd2089SBarry Smith } else { 39136bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 392ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 39336bd2089SBarry Smith } 39436bd2089SBarry Smith } 39536bd2089SBarry Smith } 39636bd2089SBarry Smith goto noinsert2; 39736bd2089SBarry Smith } 39836bd2089SBarry Smith } 39936bd2089SBarry Smith if (nonew == 1) goto noinsert2; 4005f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 40136bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4029371c9d4SSatish Balay N = nrow++ - 1; 4039371c9d4SSatish Balay high++; 40436bd2089SBarry Smith /* shift up all the later entries in this row */ 4059566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4069566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 40736bd2089SBarry Smith rp[i] = col; 40836bd2089SBarry Smith bap = ap + bs2 * i; 40936bd2089SBarry Smith if (roworiented) { 41036bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 411ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 41236bd2089SBarry Smith } 41336bd2089SBarry Smith } else { 41436bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 415ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 41636bd2089SBarry Smith } 41736bd2089SBarry Smith } 41836bd2089SBarry Smith noinsert2:; 41936bd2089SBarry Smith ailen[row] = nrow; 42036bd2089SBarry Smith PetscFunctionReturn(0); 42136bd2089SBarry Smith } 42236bd2089SBarry Smith 42336bd2089SBarry Smith /* 42436bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 42536bd2089SBarry Smith */ 426d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 427d71ae5a4SJacob Faibussowitsch { 42836bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 42936bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 43036bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 43136bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 43236bd2089SBarry Smith PetscBool roworiented = a->roworiented; 43336bd2089SBarry Smith const PetscScalar *value = v; 43436bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 43536bd2089SBarry Smith 43636bd2089SBarry Smith PetscFunctionBegin; 43736bd2089SBarry Smith rp = aj + ai[row]; 43836bd2089SBarry Smith ap = aa + bs2 * ai[row]; 43936bd2089SBarry Smith rmax = imax[row]; 44036bd2089SBarry Smith nrow = ailen[row]; 44136bd2089SBarry Smith low = 0; 44236bd2089SBarry Smith high = nrow; 44336bd2089SBarry Smith value = v; 44436bd2089SBarry Smith while (high - low > 7) { 44536bd2089SBarry Smith t = (low + high) / 2; 44636bd2089SBarry Smith if (rp[t] > col) high = t; 44736bd2089SBarry Smith else low = t; 44836bd2089SBarry Smith } 44936bd2089SBarry Smith for (i = low; i < high; i++) { 45036bd2089SBarry Smith if (rp[i] > col) break; 45136bd2089SBarry Smith if (rp[i] == col) { 45236bd2089SBarry Smith bap = ap + bs2 * i; 45336bd2089SBarry Smith if (roworiented) { 45436bd2089SBarry Smith if (is == ADD_VALUES) { 45536bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 456ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 45736bd2089SBarry Smith } 45836bd2089SBarry Smith } else { 45936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 460ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 46136bd2089SBarry Smith } 46236bd2089SBarry Smith } 46336bd2089SBarry Smith } else { 46436bd2089SBarry Smith if (is == ADD_VALUES) { 46536bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 466ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 46736bd2089SBarry Smith bap += bs; 46836bd2089SBarry Smith } 46936bd2089SBarry Smith } else { 47036bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 471ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 47236bd2089SBarry Smith bap += bs; 47336bd2089SBarry Smith } 47436bd2089SBarry Smith } 47536bd2089SBarry Smith } 47636bd2089SBarry Smith goto noinsert2; 47736bd2089SBarry Smith } 47836bd2089SBarry Smith } 47936bd2089SBarry Smith if (nonew == 1) goto noinsert2; 4805f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 48136bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4829371c9d4SSatish Balay N = nrow++ - 1; 4839371c9d4SSatish Balay high++; 48436bd2089SBarry Smith /* shift up all the later entries in this row */ 4859566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4869566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 48736bd2089SBarry Smith rp[i] = col; 48836bd2089SBarry Smith bap = ap + bs2 * i; 48936bd2089SBarry Smith if (roworiented) { 49036bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 491ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 49236bd2089SBarry Smith } 49336bd2089SBarry Smith } else { 49436bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 495ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 49636bd2089SBarry Smith } 49736bd2089SBarry Smith } 49836bd2089SBarry Smith noinsert2:; 49936bd2089SBarry Smith ailen[row] = nrow; 50036bd2089SBarry Smith PetscFunctionReturn(0); 50136bd2089SBarry Smith } 50236bd2089SBarry Smith 50336bd2089SBarry Smith /* 50436bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 50536bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 50636bd2089SBarry Smith */ 507d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 508d71ae5a4SJacob Faibussowitsch { 5090880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 510f15d580aSBarry Smith const MatScalar *value; 511f15d580aSBarry Smith MatScalar *barray = baij->barray; 512ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 513899cda47SBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 514476417e5SBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 515476417e5SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 5160880e062SHong Zhang 517a30f8f8cSSatish Balay PetscFunctionBegin; 5180880e062SHong Zhang if (!barray) { 5199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 5200880e062SHong Zhang baij->barray = barray; 5210880e062SHong Zhang } 5220880e062SHong Zhang 5230880e062SHong Zhang if (roworiented) { 5240880e062SHong Zhang stepval = (n - 1) * bs; 5250880e062SHong Zhang } else { 5260880e062SHong Zhang stepval = (m - 1) * bs; 5270880e062SHong Zhang } 5280880e062SHong Zhang for (i = 0; i < m; i++) { 5290880e062SHong Zhang if (im[i] < 0) continue; 5306bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 5310880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5320880e062SHong Zhang row = im[i] - rstart; 5330880e062SHong Zhang for (j = 0; j < n; j++) { 534f3f98c53SJed Brown if (im[i] > in[j]) { 535f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 536e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 537f3f98c53SJed Brown } 5380880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 5390880e062SHong Zhang if ((roworiented) && (n == 1)) { 540f15d580aSBarry Smith barray = (MatScalar *)v + i * bs2; 5410880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 542f15d580aSBarry Smith barray = (MatScalar *)v + j * bs2; 5430880e062SHong Zhang } else { /* Here a copy is required */ 5440880e062SHong Zhang if (roworiented) { 5450880e062SHong Zhang value = v + i * (stepval + bs) * bs + j * bs; 5460880e062SHong Zhang } else { 5470880e062SHong Zhang value = v + j * (stepval + bs) * bs + i * bs; 5480880e062SHong Zhang } 5490880e062SHong Zhang for (ii = 0; ii < bs; ii++, value += stepval) { 550ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *barray++ = *value++; 5510880e062SHong Zhang } 5520880e062SHong Zhang barray -= bs2; 5530880e062SHong Zhang } 5540880e062SHong Zhang 5550880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 5560880e062SHong Zhang col = in[j] - cstart; 5579566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 558f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 559f7d195e4SLawrence Mitchell continue; 560f7d195e4SLawrence Mitchell } else { 561f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 5620880e062SHong Zhang if (mat->was_assembled) { 56348a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 5640880e062SHong Zhang 5652515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 5660880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 5679371c9d4SSatish Balay { 5689371c9d4SSatish Balay PetscInt data; 569*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 57008401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 5710880e062SHong Zhang } 5720880e062SHong Zhang #else 57308401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 5740880e062SHong Zhang #endif 5750880e062SHong Zhang #endif 5760880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 577*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 5780880e062SHong Zhang col = (col - 1) / bs; 5790880e062SHong Zhang #else 5800880e062SHong Zhang col = (baij->colmap[in[j]] - 1) / bs; 5810880e062SHong Zhang #endif 5820880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 5839566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 5840880e062SHong Zhang col = in[j]; 5850880e062SHong Zhang } 58626fbe8dcSKarl Rupp } else col = in[j]; 5879566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 5880880e062SHong Zhang } 5890880e062SHong Zhang } 5900880e062SHong Zhang } else { 5915f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 5920880e062SHong Zhang if (!baij->donotstash) { 5930880e062SHong Zhang if (roworiented) { 5949566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5950880e062SHong Zhang } else { 5969566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5970880e062SHong Zhang } 5980880e062SHong Zhang } 5990880e062SHong Zhang } 6000880e062SHong Zhang } 6010880e062SHong Zhang PetscFunctionReturn(0); 602a30f8f8cSSatish Balay } 603a30f8f8cSSatish Balay 604d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 605d71ae5a4SJacob Faibussowitsch { 606f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 607d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 608d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 609a30f8f8cSSatish Balay 610a30f8f8cSSatish Balay PetscFunctionBegin; 611a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 61254c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 61354c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 614a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 615a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 616a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 61754c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 61854c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 619a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 620a30f8f8cSSatish Balay col = idxn[j] - bscstart; 6219566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 622a30f8f8cSSatish Balay } else { 62348a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 625*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 626a30f8f8cSSatish Balay data--; 627a30f8f8cSSatish Balay #else 628a30f8f8cSSatish Balay data = baij->colmap[idxn[j] / bs] - 1; 629a30f8f8cSSatish Balay #endif 630a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 631a30f8f8cSSatish Balay else { 632a30f8f8cSSatish Balay col = data + idxn[j] % bs; 6339566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 634a30f8f8cSSatish Balay } 635a30f8f8cSSatish Balay } 636a30f8f8cSSatish Balay } 637f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 638a30f8f8cSSatish Balay } 639a30f8f8cSSatish Balay PetscFunctionReturn(0); 640a30f8f8cSSatish Balay } 641a30f8f8cSSatish Balay 642d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 643d71ae5a4SJacob Faibussowitsch { 644a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 645a30f8f8cSSatish Balay PetscReal sum[2], *lnorm2; 646a30f8f8cSSatish Balay 647a30f8f8cSSatish Balay PetscFunctionBegin; 648a30f8f8cSSatish Balay if (baij->size == 1) { 6499566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, norm)); 650a30f8f8cSSatish Balay } else { 651a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 6529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2, &lnorm2)); 6539566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, lnorm2)); 6549371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 6559371c9d4SSatish Balay lnorm2++; /* squar power of norm(A) */ 6569566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->B, type, lnorm2)); 6579371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 6589371c9d4SSatish Balay lnorm2--; /* squar power of norm(B) */ 6591c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 6608f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 6619566063dSJacob Faibussowitsch PetscCall(PetscFree(lnorm2)); 6620b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 6630b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 6640b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 6650b8dc8d2SHong Zhang PetscReal *rsum, *rsum2, vabs; 666899cda47SBarry Smith PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 667d0f46423SBarry Smith PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 6680b8dc8d2SHong Zhang MatScalar *v; 6690b8dc8d2SHong Zhang 6709566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 6719566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 6720b8dc8d2SHong Zhang /* Amat */ 6739371c9d4SSatish Balay v = amat->a; 6749371c9d4SSatish Balay jj = amat->j; 6750b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 6760b8dc8d2SHong Zhang grow = bs * (rstart + brow); 6770b8dc8d2SHong Zhang nz = amat->i[brow + 1] - amat->i[brow]; 6780b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 6799371c9d4SSatish Balay gcol = bs * (rstart + *jj); 6809371c9d4SSatish Balay jj++; 6810b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 6820b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 6839371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 6849371c9d4SSatish Balay v++; 6850b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 6860b8dc8d2SHong Zhang /* non-diagonal block */ 6870b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 6880b8dc8d2SHong Zhang } 6890b8dc8d2SHong Zhang } 6900b8dc8d2SHong Zhang } 6919566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 6920b8dc8d2SHong Zhang } 6930b8dc8d2SHong Zhang /* Bmat */ 6949371c9d4SSatish Balay v = bmat->a; 6959371c9d4SSatish Balay jj = bmat->j; 6960b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 6970b8dc8d2SHong Zhang grow = bs * (rstart + brow); 6980b8dc8d2SHong Zhang nz = bmat->i[brow + 1] - bmat->i[brow]; 6990b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 7009371c9d4SSatish Balay gcol = bs * garray[*jj]; 7019371c9d4SSatish Balay jj++; 7020b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7030b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7049371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7059371c9d4SSatish Balay v++; 7060b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7070b8dc8d2SHong Zhang rsum[grow + row] += vabs; 7080b8dc8d2SHong Zhang } 7090b8dc8d2SHong Zhang } 7100b8dc8d2SHong Zhang } 7119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7120b8dc8d2SHong Zhang } 7131c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7140b8dc8d2SHong Zhang *norm = 0.0; 715d0f46423SBarry Smith for (col = 0; col < mat->cmap->N; col++) { 7160b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7170b8dc8d2SHong Zhang } 7189566063dSJacob Faibussowitsch PetscCall(PetscFree2(rsum, rsum2)); 719f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 720a30f8f8cSSatish Balay } 721a30f8f8cSSatish Balay PetscFunctionReturn(0); 722a30f8f8cSSatish Balay } 723a30f8f8cSSatish Balay 724d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 725d71ae5a4SJacob Faibussowitsch { 726a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 7271302d50aSBarry Smith PetscInt nstash, reallocs; 728a30f8f8cSSatish Balay 729a30f8f8cSSatish Balay PetscFunctionBegin; 73026fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 731a30f8f8cSSatish Balay 7329566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 7339566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 7349566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7359566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7369566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7379566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 738a30f8f8cSSatish Balay PetscFunctionReturn(0); 739a30f8f8cSSatish Balay } 740a30f8f8cSSatish Balay 741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 742d71ae5a4SJacob Faibussowitsch { 743a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 744a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 74513f74950SBarry Smith PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 746e44c0bd4SBarry Smith PetscInt *row, *col; 747ace3abfcSBarry Smith PetscBool other_disassembled; 74813f74950SBarry Smith PetscMPIInt n; 749ace3abfcSBarry Smith PetscBool r1, r2, r3; 750a30f8f8cSSatish Balay MatScalar *val; 751a30f8f8cSSatish Balay 75291c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 753a30f8f8cSSatish Balay PetscFunctionBegin; 7544cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 755a30f8f8cSSatish Balay while (1) { 7569566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 757a30f8f8cSSatish Balay if (!flg) break; 758a30f8f8cSSatish Balay 759a30f8f8cSSatish Balay for (i = 0; i < n;) { 760a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 76126fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 76226fbe8dcSKarl Rupp if (row[j] != rstart) break; 76326fbe8dcSKarl Rupp } 764a30f8f8cSSatish Balay if (j < n) ncols = j - i; 765a30f8f8cSSatish Balay else ncols = n - i; 766a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 7679566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 768a30f8f8cSSatish Balay i = j; 769a30f8f8cSSatish Balay } 770a30f8f8cSSatish Balay } 7719566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 772a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 773a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 774a30f8f8cSSatish Balay restore the original flags */ 775a30f8f8cSSatish Balay r1 = baij->roworiented; 776a30f8f8cSSatish Balay r2 = a->roworiented; 77791c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 77826fbe8dcSKarl Rupp 779a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 780a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 78126fbe8dcSKarl Rupp 78291c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 783a30f8f8cSSatish Balay while (1) { 7849566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 785a30f8f8cSSatish Balay if (!flg) break; 786a30f8f8cSSatish Balay 787a30f8f8cSSatish Balay for (i = 0; i < n;) { 788a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 78926fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 79026fbe8dcSKarl Rupp if (row[j] != rstart) break; 79126fbe8dcSKarl Rupp } 792a30f8f8cSSatish Balay if (j < n) ncols = j - i; 793a30f8f8cSSatish Balay else ncols = n - i; 7949566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 795a30f8f8cSSatish Balay i = j; 796a30f8f8cSSatish Balay } 797a30f8f8cSSatish Balay } 7989566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 79926fbe8dcSKarl Rupp 800a30f8f8cSSatish Balay baij->roworiented = r1; 801a30f8f8cSSatish Balay a->roworiented = r2; 80226fbe8dcSKarl Rupp 80391c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 804a30f8f8cSSatish Balay } 805a30f8f8cSSatish Balay 8069566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->A, mode)); 8079566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->A, mode)); 808a30f8f8cSSatish Balay 809a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 8106aad120cSJose E. Roman also disassemble ourselves, in order that we may reassemble. */ 811a30f8f8cSSatish Balay /* 812a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 813a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 814a30f8f8cSSatish Balay */ 815a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 8165f9db2b2SJunchao Zhang PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 81748a46eb9SPierre Jolivet if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 818a30f8f8cSSatish Balay } 819a30f8f8cSSatish Balay 8209371c9d4SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 8219566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->B, mode)); 8229566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->B, mode)); 823a30f8f8cSSatish Balay 8249566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 82526fbe8dcSKarl Rupp 826f4259b30SLisandro Dalcin baij->rowvalues = NULL; 8274f9cfa9eSBarry Smith 8284f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8294f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 830e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 8311c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 832e56f5c9eSBarry Smith } 833a30f8f8cSSatish Balay PetscFunctionReturn(0); 834a30f8f8cSSatish Balay } 835a30f8f8cSSatish Balay 836dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 8379804daf3SBarry Smith #include <petscdraw.h> 838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 839d71ae5a4SJacob Faibussowitsch { 840a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 841d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 8427da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 843ace3abfcSBarry Smith PetscBool iascii, isdraw; 844b0a32e0cSBarry Smith PetscViewer sviewer; 845f3ef73ceSBarry Smith PetscViewerFormat format; 846a30f8f8cSSatish Balay 847a30f8f8cSSatish Balay PetscFunctionBegin; 8489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 8499566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 85032077d6dSBarry Smith if (iascii) { 8519566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 852456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 853a30f8f8cSSatish Balay MatInfo info; 8549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 8559566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 8569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 8579371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 8589371c9d4SSatish Balay mat->rmap->bs, (double)info.memory)); 8599566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 8609566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 8619566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 8629566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 8639566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 8649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 8659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 8669566063dSJacob Faibussowitsch PetscCall(VecScatterView(baij->Mvctx, viewer)); 867a30f8f8cSSatish Balay PetscFunctionReturn(0); 868fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 8699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 870a30f8f8cSSatish Balay PetscFunctionReturn(0); 871c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 872c1490034SHong Zhang PetscFunctionReturn(0); 873a30f8f8cSSatish Balay } 874a30f8f8cSSatish Balay } 875a30f8f8cSSatish Balay 876a30f8f8cSSatish Balay if (isdraw) { 877b0a32e0cSBarry Smith PetscDraw draw; 878ace3abfcSBarry Smith PetscBool isnull; 8799566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 8809566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 88145f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 882a30f8f8cSSatish Balay } 883a30f8f8cSSatish Balay 8847da1fb6eSBarry Smith { 885a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 886a30f8f8cSSatish Balay Mat A; 88765d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 88865d70643SHong Zhang Mat_SeqBAIJ *Bloc; 889d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 890a30f8f8cSSatish Balay MatScalar *a; 8913e219373SBarry Smith const char *matname; 892a30f8f8cSSatish Balay 893f204ca49SKris Buschelman /* Should this be the same type as mat? */ 8949566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 895dd400576SPatrick Sanan if (rank == 0) { 8969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 897a30f8f8cSSatish Balay } else { 8989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 899a30f8f8cSSatish Balay } 9009566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPISBAIJ)); 9019566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 9029566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 903a30f8f8cSSatish Balay 904a30f8f8cSSatish Balay /* copy over the A part */ 90565d70643SHong Zhang Aloc = (Mat_SeqSBAIJ *)baij->A->data; 9069371c9d4SSatish Balay ai = Aloc->i; 9079371c9d4SSatish Balay aj = Aloc->j; 9089371c9d4SSatish Balay a = Aloc->a; 9099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 910a30f8f8cSSatish Balay 911a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 912e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 91326fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 914a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 915e9f7bc9eSHong Zhang col = (baij->cstartbs + aj[j]) * bs; 916a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9179566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 91826fbe8dcSKarl Rupp col++; 91926fbe8dcSKarl Rupp a += bs; 920a30f8f8cSSatish Balay } 921a30f8f8cSSatish Balay } 922a30f8f8cSSatish Balay } 923a30f8f8cSSatish Balay /* copy over the B part */ 92465d70643SHong Zhang Bloc = (Mat_SeqBAIJ *)baij->B->data; 9259371c9d4SSatish Balay ai = Bloc->i; 9269371c9d4SSatish Balay aj = Bloc->j; 9279371c9d4SSatish Balay a = Bloc->a; 928a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 929e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 93026fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 931a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 932a30f8f8cSSatish Balay col = baij->garray[aj[j]] * bs; 933a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9349566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 93526fbe8dcSKarl Rupp col++; 93626fbe8dcSKarl Rupp a += bs; 937a30f8f8cSSatish Balay } 938a30f8f8cSSatish Balay } 939a30f8f8cSSatish Balay } 9409566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 9419566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 9429566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 943a30f8f8cSSatish Balay /* 944a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 945b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 946a30f8f8cSSatish Balay */ 9479566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 9489566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 949dd400576SPatrick Sanan if (rank == 0) { 9509566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 9519566063dSJacob Faibussowitsch PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 952a30f8f8cSSatish Balay } 9539566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 9549566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 956a30f8f8cSSatish Balay } 957a30f8f8cSSatish Balay PetscFunctionReturn(0); 958a30f8f8cSSatish Balay } 959a30f8f8cSSatish Balay 960618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 961618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 962d1654148SHong Zhang 963d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 964d71ae5a4SJacob Faibussowitsch { 965ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 966a30f8f8cSSatish Balay 967a30f8f8cSSatish Balay PetscFunctionBegin; 9689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 9699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 9709566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 9719566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 972d1654148SHong Zhang if (iascii || isdraw || issocket) { 9739566063dSJacob Faibussowitsch PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 9741baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 975a30f8f8cSSatish Balay PetscFunctionReturn(0); 976a30f8f8cSSatish Balay } 977a30f8f8cSSatish Balay 978d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 979d71ae5a4SJacob Faibussowitsch { 980a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 981a30f8f8cSSatish Balay 982a30f8f8cSSatish Balay PetscFunctionBegin; 983a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 984c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 985a30f8f8cSSatish Balay #endif 9869566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 9879566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->bstash)); 9889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->A)); 9899566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->B)); 990a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 991*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap)); 992a30f8f8cSSatish Balay #else 9939566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 994a30f8f8cSSatish Balay #endif 9959566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 9969566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 9979566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 9989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0)); 9999566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0b)); 10009566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1)); 10019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1a)); 10029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1b)); 10039566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->sMvctx)); 10049566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 10059566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->barray)); 10069566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->hd)); 10079566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->diag)); 10089566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->bb1)); 10099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->xx1)); 1010ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 10119566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->setvaluescopy)); 1012a30f8f8cSSatish Balay #endif 10139566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 10149566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 10159566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->rangebs)); 10169566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 1017901853e0SKris Buschelman 10189566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 10199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 10209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 10219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 10229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 10236214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 10249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 10256214f412SHong Zhang #endif 1026d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 10279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1028d24d4204SJose E. Roman #endif 10299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 10309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1031a30f8f8cSSatish Balay PetscFunctionReturn(0); 1032a30f8f8cSSatish Balay } 1033a30f8f8cSSatish Balay 1034d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1035d71ae5a4SJacob Faibussowitsch { 1036547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1037eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 10386de40e93SBarry Smith PetscScalar *from; 10396de40e93SBarry Smith const PetscScalar *x; 1040547795f9SHong Zhang 1041547795f9SHong Zhang PetscFunctionBegin; 1042547795f9SHong Zhang /* diagonal part */ 10439566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10449566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1045547795f9SHong Zhang 1046547795f9SHong Zhang /* subdiagonal part */ 10475f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 10489566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1049547795f9SHong Zhang 1050547795f9SHong Zhang /* copy x into the vec slvec0 */ 10519566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1053547795f9SHong Zhang 10549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10559566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10569566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1057547795f9SHong Zhang 10589566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1060547795f9SHong Zhang /* supperdiagonal part */ 10619566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1062547795f9SHong Zhang PetscFunctionReturn(0); 1063547795f9SHong Zhang } 1064547795f9SHong Zhang 1065d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1066d71ae5a4SJacob Faibussowitsch { 1067a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1068eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1069d9ca1df4SBarry Smith PetscScalar *from; 1070d9ca1df4SBarry Smith const PetscScalar *x; 1071a9d4b620SHong Zhang 1072a9d4b620SHong Zhang PetscFunctionBegin; 1073a9d4b620SHong Zhang /* diagonal part */ 10749566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10759566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1076a9d4b620SHong Zhang 1077a9d4b620SHong Zhang /* subdiagonal part */ 10789566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1079fc165ae2SBarry Smith 1080a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 10819566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1083a9d4b620SHong Zhang 10849566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1087fc165ae2SBarry Smith 10889566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10899566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1090a9d4b620SHong Zhang /* supperdiagonal part */ 10919566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1092a9d4b620SHong Zhang PetscFunctionReturn(0); 1093a9d4b620SHong Zhang } 1094a9d4b620SHong Zhang 1095d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1096d71ae5a4SJacob Faibussowitsch { 1097eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1098eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1099eb1ec7c1SStefano Zampini PetscScalar *from, zero = 0.0; 1100eb1ec7c1SStefano Zampini const PetscScalar *x; 1101eb1ec7c1SStefano Zampini 1102eb1ec7c1SStefano Zampini PetscFunctionBegin; 1103eb1ec7c1SStefano Zampini /* diagonal part */ 11049566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11059566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1106eb1ec7c1SStefano Zampini 1107eb1ec7c1SStefano Zampini /* subdiagonal part */ 11085f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 11099566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1110eb1ec7c1SStefano Zampini 1111eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 11129566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11139566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1116eb1ec7c1SStefano Zampini 11179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1120eb1ec7c1SStefano Zampini 1121eb1ec7c1SStefano Zampini /* supperdiagonal part */ 11229566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1123eb1ec7c1SStefano Zampini PetscFunctionReturn(0); 1124eb1ec7c1SStefano Zampini } 1125eb1ec7c1SStefano Zampini 1126d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1127d71ae5a4SJacob Faibussowitsch { 1128de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1129d0f46423SBarry Smith PetscInt mbs = a->mbs, bs = A->rmap->bs; 1130d9ca1df4SBarry Smith PetscScalar *from, zero = 0.0; 1131d9ca1df4SBarry Smith const PetscScalar *x; 1132a9d4b620SHong Zhang 1133a9d4b620SHong Zhang PetscFunctionBegin; 1134a9d4b620SHong Zhang /* diagonal part */ 11359566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11369566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1137a9d4b620SHong Zhang 1138a9d4b620SHong Zhang /* subdiagonal part */ 11399566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1140a9d4b620SHong Zhang 1141a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11429566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1146a9d4b620SHong Zhang 11479566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150a9d4b620SHong Zhang 1151a9d4b620SHong Zhang /* supperdiagonal part */ 11529566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1153a9d4b620SHong Zhang PetscFunctionReturn(0); 1154a9d4b620SHong Zhang } 1155a9d4b620SHong Zhang 1156a30f8f8cSSatish Balay /* 1157a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1158a30f8f8cSSatish Balay diagonal block 1159a30f8f8cSSatish Balay */ 1160d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1161d71ae5a4SJacob Faibussowitsch { 1162a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1163a30f8f8cSSatish Balay 1164a30f8f8cSSatish Balay PetscFunctionBegin; 116508401ef6SPierre Jolivet /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 11669566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A, v)); 1167a30f8f8cSSatish Balay PetscFunctionReturn(0); 1168a30f8f8cSSatish Balay } 1169a30f8f8cSSatish Balay 1170d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1171d71ae5a4SJacob Faibussowitsch { 1172a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1173a30f8f8cSSatish Balay 1174a30f8f8cSSatish Balay PetscFunctionBegin; 11759566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 11769566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 1177a30f8f8cSSatish Balay PetscFunctionReturn(0); 1178a30f8f8cSSatish Balay } 1179a30f8f8cSSatish Balay 1180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1181d71ae5a4SJacob Faibussowitsch { 1182d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1183d0d4cfc2SHong Zhang PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1184d0f46423SBarry Smith PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1185d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1186899cda47SBarry Smith PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1187d0d4cfc2SHong Zhang 1188a30f8f8cSSatish Balay PetscFunctionBegin; 11895f80ce2aSJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1190d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1191d0d4cfc2SHong Zhang 1192d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1193d0d4cfc2SHong Zhang /* 1194d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1195d0d4cfc2SHong Zhang */ 1196d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1197d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1198d0d4cfc2SHong Zhang PetscInt max = 1, mbs = mat->mbs, tmp; 1199d0d4cfc2SHong Zhang for (i = 0; i < mbs; i++) { 1200d0d4cfc2SHong Zhang tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 120126fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1202d0d4cfc2SHong Zhang } 12039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1204d0d4cfc2SHong Zhang } 1205d0d4cfc2SHong Zhang 12065f80ce2aSJacob Faibussowitsch PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1207d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1208d0d4cfc2SHong Zhang 12099371c9d4SSatish Balay pvA = &vworkA; 12109371c9d4SSatish Balay pcA = &cworkA; 12119371c9d4SSatish Balay pvB = &vworkB; 12129371c9d4SSatish Balay pcB = &cworkB; 12139371c9d4SSatish Balay if (!v) { 12149371c9d4SSatish Balay pvA = NULL; 12159371c9d4SSatish Balay pvB = NULL; 12169371c9d4SSatish Balay } 12179371c9d4SSatish Balay if (!idx) { 12189371c9d4SSatish Balay pcA = NULL; 12199371c9d4SSatish Balay if (!v) pcB = NULL; 12209371c9d4SSatish Balay } 12219566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 12229566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1223d0d4cfc2SHong Zhang nztot = nzA + nzB; 1224d0d4cfc2SHong Zhang 1225d0d4cfc2SHong Zhang cmap = mat->garray; 1226d0d4cfc2SHong Zhang if (v || idx) { 1227d0d4cfc2SHong Zhang if (nztot) { 1228d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1229d0d4cfc2SHong Zhang PetscInt imark = -1; 1230d0d4cfc2SHong Zhang if (v) { 1231d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1232d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 1233d0d4cfc2SHong Zhang if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1234d0d4cfc2SHong Zhang else break; 1235d0d4cfc2SHong Zhang } 1236d0d4cfc2SHong Zhang imark = i; 1237d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1238d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1239d0d4cfc2SHong Zhang } 1240d0d4cfc2SHong Zhang if (idx) { 1241d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1242d0d4cfc2SHong Zhang if (imark > -1) { 1243ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244d0d4cfc2SHong Zhang } else { 1245d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 124626fbe8dcSKarl Rupp if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1247d0d4cfc2SHong Zhang else break; 1248d0d4cfc2SHong Zhang } 1249d0d4cfc2SHong Zhang imark = i; 1250d0d4cfc2SHong Zhang } 1251d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1252d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253d0d4cfc2SHong Zhang } 1254d0d4cfc2SHong Zhang } else { 1255f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1256f4259b30SLisandro Dalcin if (v) *v = NULL; 1257d0d4cfc2SHong Zhang } 1258d0d4cfc2SHong Zhang } 1259d0d4cfc2SHong Zhang *nz = nztot; 12609566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 12619566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1262a30f8f8cSSatish Balay PetscFunctionReturn(0); 1263a30f8f8cSSatish Balay } 1264a30f8f8cSSatish Balay 1265d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1266d71ae5a4SJacob Faibussowitsch { 1267a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1268a30f8f8cSSatish Balay 1269a30f8f8cSSatish Balay PetscFunctionBegin; 12705f80ce2aSJacob Faibussowitsch PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1271a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 1272a30f8f8cSSatish Balay PetscFunctionReturn(0); 1273a30f8f8cSSatish Balay } 1274a30f8f8cSSatish Balay 1275d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1276d71ae5a4SJacob Faibussowitsch { 1277d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1278d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1279d0d4cfc2SHong Zhang 1280d0d4cfc2SHong Zhang PetscFunctionBegin; 1281d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 1282d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1283d0d4cfc2SHong Zhang } 1284d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285d71ae5a4SJacob Faibussowitsch { 1286d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1287d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1288d0d4cfc2SHong Zhang 1289d0d4cfc2SHong Zhang PetscFunctionBegin; 1290d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 1291d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1292d0d4cfc2SHong Zhang } 1293d0d4cfc2SHong Zhang 1294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1295d71ae5a4SJacob Faibussowitsch { 12965f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 12975f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 12982726fb6dSPierre Jolivet Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 12992726fb6dSPierre Jolivet 13009566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 13019566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->B)); 13025f80ce2aSJacob Faibussowitsch } 13032726fb6dSPierre Jolivet PetscFunctionReturn(0); 13042726fb6dSPierre Jolivet } 13052726fb6dSPierre Jolivet 1306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1307d71ae5a4SJacob Faibussowitsch { 130899cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 130999cafbc1SBarry Smith 131099cafbc1SBarry Smith PetscFunctionBegin; 13119566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 13129566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 131399cafbc1SBarry Smith PetscFunctionReturn(0); 131499cafbc1SBarry Smith } 131599cafbc1SBarry Smith 1316d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1317d71ae5a4SJacob Faibussowitsch { 131899cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 131999cafbc1SBarry Smith 132099cafbc1SBarry Smith PetscFunctionBegin; 13219566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 13229566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 132399cafbc1SBarry Smith PetscFunctionReturn(0); 132499cafbc1SBarry Smith } 132599cafbc1SBarry Smith 13267dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 132736032a97SHong Zhang Input: isrow - distributed(parallel), 132836032a97SHong Zhang iscol_local - locally owned (seq) 132936032a97SHong Zhang */ 1330d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1331d71ae5a4SJacob Faibussowitsch { 13328f46ffcaSHong Zhang PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 13338f46ffcaSHong Zhang const PetscInt *ptr1, *ptr2; 133436032a97SHong Zhang 133536032a97SHong Zhang PetscFunctionBegin; 13369566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &sz1)); 13379566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local, &sz2)); 13381098a8e8SHong Zhang if (sz1 > sz2) { 13391098a8e8SHong Zhang *flg = PETSC_FALSE; 13401098a8e8SHong Zhang PetscFunctionReturn(0); 13411098a8e8SHong Zhang } 13428f46ffcaSHong Zhang 13439566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &ptr1)); 13449566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &ptr2)); 13458f46ffcaSHong Zhang 13469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz1, &a1)); 13479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz2, &a2)); 13489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a1, ptr1, sz1)); 13499566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a2, ptr2, sz2)); 13509566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz1, a1)); 13519566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz2, a2)); 13528f46ffcaSHong Zhang 13538f46ffcaSHong Zhang nmatch = 0; 13548f46ffcaSHong Zhang k = 0; 13558f46ffcaSHong Zhang for (i = 0; i < sz1; i++) { 13568f46ffcaSHong Zhang for (j = k; j < sz2; j++) { 13578f46ffcaSHong Zhang if (a1[i] == a2[j]) { 13589371c9d4SSatish Balay k = j; 13599371c9d4SSatish Balay nmatch++; 13608f46ffcaSHong Zhang break; 13618f46ffcaSHong Zhang } 13628f46ffcaSHong Zhang } 13638f46ffcaSHong Zhang } 13649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &ptr1)); 13659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 13669566063dSJacob Faibussowitsch PetscCall(PetscFree(a1)); 13679566063dSJacob Faibussowitsch PetscCall(PetscFree(a2)); 13681098a8e8SHong Zhang if (nmatch < sz1) { 13691098a8e8SHong Zhang *flg = PETSC_FALSE; 13701098a8e8SHong Zhang } else { 13711098a8e8SHong Zhang *flg = PETSC_TRUE; 13721098a8e8SHong Zhang } 137336032a97SHong Zhang PetscFunctionReturn(0); 13748f46ffcaSHong Zhang } 137536032a97SHong Zhang 1376d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1377d71ae5a4SJacob Faibussowitsch { 137836032a97SHong Zhang IS iscol_local; 137936032a97SHong Zhang PetscInt csize; 138036032a97SHong Zhang PetscBool isequal; 138136032a97SHong Zhang 138236032a97SHong Zhang PetscFunctionBegin; 13839566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 138436032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 13859566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 13865f80ce2aSJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 138736032a97SHong Zhang } else { 1388068661f9SToby Isaac PetscBool issorted; 1389068661f9SToby Isaac 13909566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 13919566063dSJacob Faibussowitsch PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 13929566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local, &issorted)); 13935f80ce2aSJacob Faibussowitsch PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 13948f46ffcaSHong Zhang } 13958f46ffcaSHong Zhang 13967dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 13979566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 13988f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 13999566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 14009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 14018f46ffcaSHong Zhang } 14028f46ffcaSHong Zhang PetscFunctionReturn(0); 14038f46ffcaSHong Zhang } 14048f46ffcaSHong Zhang 1405d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1406d71ae5a4SJacob Faibussowitsch { 1407a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1408a30f8f8cSSatish Balay 1409a30f8f8cSSatish Balay PetscFunctionBegin; 14109566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 14119566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 1412a30f8f8cSSatish Balay PetscFunctionReturn(0); 1413a30f8f8cSSatish Balay } 1414a30f8f8cSSatish Balay 1415d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1416d71ae5a4SJacob Faibussowitsch { 1417a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1418a30f8f8cSSatish Balay Mat A = a->A, B = a->B; 14193966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 1420a30f8f8cSSatish Balay 1421a30f8f8cSSatish Balay PetscFunctionBegin; 1422d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 142326fbe8dcSKarl Rupp 14249566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 142526fbe8dcSKarl Rupp 14269371c9d4SSatish Balay isend[0] = info->nz_used; 14279371c9d4SSatish Balay isend[1] = info->nz_allocated; 14289371c9d4SSatish Balay isend[2] = info->nz_unneeded; 14299371c9d4SSatish Balay isend[3] = info->memory; 14309371c9d4SSatish Balay isend[4] = info->mallocs; 143126fbe8dcSKarl Rupp 14329566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 143326fbe8dcSKarl Rupp 14349371c9d4SSatish Balay isend[0] += info->nz_used; 14359371c9d4SSatish Balay isend[1] += info->nz_allocated; 14369371c9d4SSatish Balay isend[2] += info->nz_unneeded; 14379371c9d4SSatish Balay isend[3] += info->memory; 14389371c9d4SSatish Balay isend[4] += info->mallocs; 1439a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1440a30f8f8cSSatish Balay info->nz_used = isend[0]; 1441a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1442a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1443a30f8f8cSSatish Balay info->memory = isend[3]; 1444a30f8f8cSSatish Balay info->mallocs = isend[4]; 1445a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14461c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 144726fbe8dcSKarl Rupp 1448a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1449a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1450a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1451a30f8f8cSSatish Balay info->memory = irecv[3]; 1452a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1453a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14541c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 145526fbe8dcSKarl Rupp 1456a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1457a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1458a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1459a30f8f8cSSatish Balay info->memory = irecv[3]; 1460a30f8f8cSSatish Balay info->mallocs = irecv[4]; 146198921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1462a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1464a30f8f8cSSatish Balay info->factor_mallocs = 0; 1465a30f8f8cSSatish Balay PetscFunctionReturn(0); 1466a30f8f8cSSatish Balay } 1467a30f8f8cSSatish Balay 1468d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1469d71ae5a4SJacob Faibussowitsch { 1470a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1471d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1472a30f8f8cSSatish Balay 1473a30f8f8cSSatish Balay PetscFunctionBegin; 1474e98b92d7SKris Buschelman switch (op) { 1475512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1476e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 147728b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1479c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1480e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 148143674050SBarry Smith MatCheckPreallocated(A, 1); 14829566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14839566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1484e98b92d7SKris Buschelman break; 1485e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 148643674050SBarry Smith MatCheckPreallocated(A, 1); 14874e0d8c25SBarry Smith a->roworiented = flg; 148826fbe8dcSKarl Rupp 14899566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14909566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1491e98b92d7SKris Buschelman break; 14928c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1493d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1494d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1495d71ae5a4SJacob Faibussowitsch break; 1496d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 1497d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 1498d71ae5a4SJacob Faibussowitsch break; 1499d71ae5a4SJacob Faibussowitsch case MAT_USE_HASH_TABLE: 1500d71ae5a4SJacob Faibussowitsch a->ht_flag = flg; 1501d71ae5a4SJacob Faibussowitsch break; 1502d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 1503d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1504d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 15050f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1506eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1507547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1510eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1511b94d7dedSBarry Smith A->symmetric = PETSC_BOOL3_FALSE; 1512eb1ec7c1SStefano Zampini } 15130f2140c7SStefano Zampini #endif 1514eeffb40dSHong Zhang break; 1515ffa07934SHong Zhang case MAT_SPD: 1516d71ae5a4SJacob Faibussowitsch case MAT_SYMMETRIC: 1517d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1518d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1519eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1520eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1521eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1522eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1523eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1524eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525eb1ec7c1SStefano Zampini } 1526eb1ec7c1SStefano Zampini #endif 1527eeffb40dSHong Zhang break; 152877e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 152943674050SBarry Smith MatCheckPreallocated(A, 1); 15309566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1531eeffb40dSHong Zhang break; 15329a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1533b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 15345f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 15359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 153677e54ba9SKris Buschelman break; 1537d71ae5a4SJacob Faibussowitsch case MAT_SPD_ETERNAL: 1538d71ae5a4SJacob Faibussowitsch break; 1539d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_LOWER_TRIANGULAR: 1540d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1541d71ae5a4SJacob Faibussowitsch break; 1542d71ae5a4SJacob Faibussowitsch case MAT_ERROR_LOWER_TRIANGULAR: 1543d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1544d71ae5a4SJacob Faibussowitsch break; 1545d71ae5a4SJacob Faibussowitsch case MAT_GETROW_UPPERTRIANGULAR: 1546d71ae5a4SJacob Faibussowitsch aA->getrow_utriangular = flg; 1547d71ae5a4SJacob Faibussowitsch break; 1548d71ae5a4SJacob Faibussowitsch default: 1549d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1550a30f8f8cSSatish Balay } 1551a30f8f8cSSatish Balay PetscFunctionReturn(0); 1552a30f8f8cSSatish Balay } 1553a30f8f8cSSatish Balay 1554d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1555d71ae5a4SJacob Faibussowitsch { 1556a30f8f8cSSatish Balay PetscFunctionBegin; 15577fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1558cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 15599566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1560cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 15619566063dSJacob Faibussowitsch PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1562fc4dec0aSBarry Smith } 15638115998fSBarry Smith PetscFunctionReturn(0); 1564a30f8f8cSSatish Balay } 1565a30f8f8cSSatish Balay 1566d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1567d71ae5a4SJacob Faibussowitsch { 1568a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1569a30f8f8cSSatish Balay Mat a = baij->A, b = baij->B; 15705e90f9d9SHong Zhang PetscInt nv, m, n; 1571ace3abfcSBarry Smith PetscBool flg; 1572a30f8f8cSSatish Balay 1573a30f8f8cSSatish Balay PetscFunctionBegin; 1574a30f8f8cSSatish Balay if (ll != rr) { 15759566063dSJacob Faibussowitsch PetscCall(VecEqual(ll, rr, &flg)); 15765f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1577a30f8f8cSSatish Balay } 1578b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 1579b3bf805bSHong Zhang 15809566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 15815f80ce2aSJacob Faibussowitsch PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1582b3bf805bSHong Zhang 15839566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &nv)); 15845f80ce2aSJacob Faibussowitsch PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 15855e90f9d9SHong Zhang 15869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 15875e90f9d9SHong Zhang 15885e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 1589dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 15905e90f9d9SHong Zhang 15915e90f9d9SHong Zhang /* scale the diagonal part */ 1592dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 1593a30f8f8cSSatish Balay 15945e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 15959566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1596dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1597a30f8f8cSSatish Balay PetscFunctionReturn(0); 1598a30f8f8cSSatish Balay } 1599a30f8f8cSSatish Balay 1600d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1601d71ae5a4SJacob Faibussowitsch { 1602f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1603a30f8f8cSSatish Balay 1604a30f8f8cSSatish Balay PetscFunctionBegin; 16059566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 1606a30f8f8cSSatish Balay PetscFunctionReturn(0); 1607a30f8f8cSSatish Balay } 1608a30f8f8cSSatish Balay 16096849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1610a30f8f8cSSatish Balay 1611d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1612d71ae5a4SJacob Faibussowitsch { 1613a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1614a30f8f8cSSatish Balay Mat a, b, c, d; 1615ace3abfcSBarry Smith PetscBool flg; 1616a30f8f8cSSatish Balay 1617a30f8f8cSSatish Balay PetscFunctionBegin; 16189371c9d4SSatish Balay a = matA->A; 16199371c9d4SSatish Balay b = matA->B; 16209371c9d4SSatish Balay c = matB->A; 16219371c9d4SSatish Balay d = matB->B; 1622a30f8f8cSSatish Balay 16239566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 162448a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 16251c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1626a30f8f8cSSatish Balay PetscFunctionReturn(0); 1627a30f8f8cSSatish Balay } 1628a30f8f8cSSatish Balay 1629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1630d71ae5a4SJacob Faibussowitsch { 16314c7a3774SStefano Zampini PetscBool isbaij; 16323c896bc6SHong Zhang 16333c896bc6SHong Zhang PetscFunctionBegin; 16349566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 16355f80ce2aSJacob Faibussowitsch PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 16363c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16373c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 16389566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 16399566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 16409566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 16413c896bc6SHong Zhang } else { 16424c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 16434c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 16444c7a3774SStefano Zampini 16459566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 16469566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 16473c896bc6SHong Zhang } 16489566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 16493c896bc6SHong Zhang PetscFunctionReturn(0); 16503c896bc6SHong Zhang } 16513c896bc6SHong Zhang 1652d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1653d71ae5a4SJacob Faibussowitsch { 1654273d9f13SBarry Smith PetscFunctionBegin; 16559566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1656273d9f13SBarry Smith PetscFunctionReturn(0); 1657273d9f13SBarry Smith } 1658a5e6ed63SBarry Smith 1659d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1660d71ae5a4SJacob Faibussowitsch { 16614fe895cdSHong Zhang Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 16624fe895cdSHong Zhang PetscBLASInt bnz, one = 1; 16634fe895cdSHong Zhang Mat_SeqSBAIJ *xa, *ya; 16644fe895cdSHong Zhang Mat_SeqBAIJ *xb, *yb; 16654fe895cdSHong Zhang 16664fe895cdSHong Zhang PetscFunctionBegin; 16674fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 16684fe895cdSHong Zhang PetscScalar alpha = a; 16694fe895cdSHong Zhang xa = (Mat_SeqSBAIJ *)xx->A->data; 16704fe895cdSHong Zhang ya = (Mat_SeqSBAIJ *)yy->A->data; 16719566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1672792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 16734fe895cdSHong Zhang xb = (Mat_SeqBAIJ *)xx->B->data; 16744fe895cdSHong Zhang yb = (Mat_SeqBAIJ *)yy->B->data; 16759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1676792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 16779566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1678ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 16799566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 16809566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 16819566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 16824fe895cdSHong Zhang } else { 16834de5dceeSHong Zhang Mat B; 16844de5dceeSHong Zhang PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 16855f80ce2aSJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 16869566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(X)); 16879566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(Y)); 16889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 16899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 16909566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 16919566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 16929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 16939566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 16949566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPISBAIJ)); 16959566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 16969566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 16979566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 16989566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 16999566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 17009566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 17019566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 17029566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(X)); 17039566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(Y)); 17044fe895cdSHong Zhang } 17054fe895cdSHong Zhang PetscFunctionReturn(0); 17064fe895cdSHong Zhang } 17074fe895cdSHong Zhang 1708d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1709d71ae5a4SJacob Faibussowitsch { 17101302d50aSBarry Smith PetscInt i; 1711afebec48SHong Zhang PetscBool flg; 1712a5e6ed63SBarry Smith 17136849ba73SBarry Smith PetscFunctionBegin; 17149566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1715a5e6ed63SBarry Smith for (i = 0; i < n; i++) { 17169566063dSJacob Faibussowitsch PetscCall(ISEqual(irow[i], icol[i], &flg)); 171748a46eb9SPierre Jolivet if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 17184dcd73b1SHong Zhang } 1719a5e6ed63SBarry Smith PetscFunctionReturn(0); 1720a5e6ed63SBarry Smith } 1721a5e6ed63SBarry Smith 1722d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1723d71ae5a4SJacob Faibussowitsch { 17247d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 17256f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 17267d68702bSBarry Smith 17277d68702bSBarry Smith PetscFunctionBegin; 17286f33a894SBarry Smith if (!Y->preallocated) { 17299566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 17306f33a894SBarry Smith } else if (!aij->nz) { 1731b83222d8SBarry Smith PetscInt nonew = aij->nonew; 17329566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1733b83222d8SBarry Smith aij->nonew = nonew; 17347d68702bSBarry Smith } 17359566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 17367d68702bSBarry Smith PetscFunctionReturn(0); 17377d68702bSBarry Smith } 17387d68702bSBarry Smith 1739d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1740d71ae5a4SJacob Faibussowitsch { 17413b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 17423b49f96aSBarry Smith 17433b49f96aSBarry Smith PetscFunctionBegin; 17445f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 17459566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 17463b49f96aSBarry Smith if (d) { 17473b49f96aSBarry Smith PetscInt rstart; 17489566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 17493b49f96aSBarry Smith *d += rstart / A->rmap->bs; 17503b49f96aSBarry Smith } 17513b49f96aSBarry Smith PetscFunctionReturn(0); 17523b49f96aSBarry Smith } 17533b49f96aSBarry Smith 1754d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1755d71ae5a4SJacob Faibussowitsch { 1756a5b7ff6bSBarry Smith PetscFunctionBegin; 1757a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ *)A->data)->A; 1758a5b7ff6bSBarry Smith PetscFunctionReturn(0); 1759a5b7ff6bSBarry Smith } 17603b49f96aSBarry Smith 1761a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 17623964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1763a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1764a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1765a9d4b620SHong Zhang MatMult_MPISBAIJ, 176697304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1767431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1768431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1769f4259b30SLisandro Dalcin NULL, 1770f4259b30SLisandro Dalcin NULL, 1771f4259b30SLisandro Dalcin NULL, 1772f4259b30SLisandro Dalcin /* 10*/ NULL, 1773f4259b30SLisandro Dalcin NULL, 1774f4259b30SLisandro Dalcin NULL, 177541f059aeSBarry Smith MatSOR_MPISBAIJ, 1776a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 177797304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1778a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1779a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1780a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1781a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 178297304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1783a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1784a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1785a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1786f4259b30SLisandro Dalcin /* 24*/ NULL, 1787f4259b30SLisandro Dalcin NULL, 1788f4259b30SLisandro Dalcin NULL, 1789f4259b30SLisandro Dalcin NULL, 1790f4259b30SLisandro Dalcin NULL, 17914994cf47SJed Brown /* 29*/ MatSetUp_MPISBAIJ, 1792f4259b30SLisandro Dalcin NULL, 1793f4259b30SLisandro Dalcin NULL, 1794a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1795f4259b30SLisandro Dalcin NULL, 1796d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1797f4259b30SLisandro Dalcin NULL, 1798f4259b30SLisandro Dalcin NULL, 1799f4259b30SLisandro Dalcin NULL, 1800f4259b30SLisandro Dalcin NULL, 1801d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18027dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1803d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1804a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18053c896bc6SHong Zhang MatCopy_MPISBAIJ, 1806f4259b30SLisandro Dalcin /* 44*/ NULL, 1807a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18087d68702bSBarry Smith MatShift_MPISBAIJ, 1809f4259b30SLisandro Dalcin NULL, 1810f4259b30SLisandro Dalcin NULL, 1811f4259b30SLisandro Dalcin /* 49*/ NULL, 1812f4259b30SLisandro Dalcin NULL, 1813f4259b30SLisandro Dalcin NULL, 1814f4259b30SLisandro Dalcin NULL, 1815f4259b30SLisandro Dalcin NULL, 1816f4259b30SLisandro Dalcin /* 54*/ NULL, 1817f4259b30SLisandro Dalcin NULL, 1818a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1819f4259b30SLisandro Dalcin NULL, 1820a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18217dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1822f4259b30SLisandro Dalcin NULL, 1823f4259b30SLisandro Dalcin NULL, 1824f4259b30SLisandro Dalcin NULL, 1825f4259b30SLisandro Dalcin NULL, 1826f4259b30SLisandro Dalcin /* 64*/ NULL, 1827f4259b30SLisandro Dalcin NULL, 1828f4259b30SLisandro Dalcin NULL, 1829f4259b30SLisandro Dalcin NULL, 1830f4259b30SLisandro Dalcin NULL, 1831d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1832f4259b30SLisandro Dalcin NULL, 183328d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 1834f4259b30SLisandro Dalcin NULL, 1835f4259b30SLisandro Dalcin NULL, 1836f4259b30SLisandro Dalcin /* 74*/ NULL, 1837f4259b30SLisandro Dalcin NULL, 1838f4259b30SLisandro Dalcin NULL, 1839f4259b30SLisandro Dalcin NULL, 1840f4259b30SLisandro Dalcin NULL, 1841f4259b30SLisandro Dalcin /* 79*/ NULL, 1842f4259b30SLisandro Dalcin NULL, 1843f4259b30SLisandro Dalcin NULL, 1844f4259b30SLisandro Dalcin NULL, 18455bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1846f4259b30SLisandro Dalcin /* 84*/ NULL, 1847f4259b30SLisandro Dalcin NULL, 1848f4259b30SLisandro Dalcin NULL, 1849f4259b30SLisandro Dalcin NULL, 1850f4259b30SLisandro Dalcin NULL, 1851f4259b30SLisandro Dalcin /* 89*/ NULL, 1852f4259b30SLisandro Dalcin NULL, 1853f4259b30SLisandro Dalcin NULL, 1854f4259b30SLisandro Dalcin NULL, 1855f4259b30SLisandro Dalcin NULL, 1856f4259b30SLisandro Dalcin /* 94*/ NULL, 1857f4259b30SLisandro Dalcin NULL, 1858f4259b30SLisandro Dalcin NULL, 1859f4259b30SLisandro Dalcin NULL, 1860f4259b30SLisandro Dalcin NULL, 1861f4259b30SLisandro Dalcin /* 99*/ NULL, 1862f4259b30SLisandro Dalcin NULL, 1863f4259b30SLisandro Dalcin NULL, 18642726fb6dSPierre Jolivet MatConjugate_MPISBAIJ, 1865f4259b30SLisandro Dalcin NULL, 1866f4259b30SLisandro Dalcin /*104*/ NULL, 186799cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1868d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1869d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 187095936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 1871f4259b30SLisandro Dalcin /*109*/ NULL, 1872f4259b30SLisandro Dalcin NULL, 1873f4259b30SLisandro Dalcin NULL, 1874f4259b30SLisandro Dalcin NULL, 18753b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 1876f4259b30SLisandro Dalcin /*114*/ NULL, 1877f4259b30SLisandro Dalcin NULL, 1878f4259b30SLisandro Dalcin NULL, 1879f4259b30SLisandro Dalcin NULL, 1880f4259b30SLisandro Dalcin NULL, 1881f4259b30SLisandro Dalcin /*119*/ NULL, 1882f4259b30SLisandro Dalcin NULL, 1883f4259b30SLisandro Dalcin NULL, 1884f4259b30SLisandro Dalcin NULL, 1885f4259b30SLisandro Dalcin NULL, 1886f4259b30SLisandro Dalcin /*124*/ NULL, 1887f4259b30SLisandro Dalcin NULL, 1888f4259b30SLisandro Dalcin NULL, 1889f4259b30SLisandro Dalcin NULL, 1890f4259b30SLisandro Dalcin NULL, 1891f4259b30SLisandro Dalcin /*129*/ NULL, 1892f4259b30SLisandro Dalcin NULL, 1893f4259b30SLisandro Dalcin NULL, 1894f4259b30SLisandro Dalcin NULL, 1895f4259b30SLisandro Dalcin NULL, 1896f4259b30SLisandro Dalcin /*134*/ NULL, 1897f4259b30SLisandro Dalcin NULL, 1898f4259b30SLisandro Dalcin NULL, 1899f4259b30SLisandro Dalcin NULL, 1900f4259b30SLisandro Dalcin NULL, 190146533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1902f4259b30SLisandro Dalcin NULL, 1903f4259b30SLisandro Dalcin NULL, 1904f4259b30SLisandro Dalcin NULL, 1905f4259b30SLisandro Dalcin NULL, 1906d70f29a3SPierre Jolivet /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1907d70f29a3SPierre Jolivet NULL, 1908d70f29a3SPierre Jolivet NULL, 190999a7f59eSMark Adams NULL, 191099a7f59eSMark Adams NULL, 19117fb60732SBarry Smith NULL, 1912dec0b466SHong Zhang /*150*/ NULL, 1913dec0b466SHong Zhang NULL}; 1914a30f8f8cSSatish Balay 1915d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1916d71ae5a4SJacob Faibussowitsch { 1917476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1918535b19f3SBarry Smith PetscInt i, mbs, Mbs; 19195d2a9ed1SStefano Zampini PetscMPIInt size; 1920a23d5eceSKris Buschelman 1921a23d5eceSKris Buschelman PetscFunctionBegin; 19229566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 19239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 19259566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 19265f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 19275f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1928899cda47SBarry Smith 1929d0f46423SBarry Smith mbs = B->rmap->n / bs; 1930d0f46423SBarry Smith Mbs = B->rmap->N / bs; 19315f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1932a23d5eceSKris Buschelman 1933d0f46423SBarry Smith B->rmap->bs = bs; 1934a23d5eceSKris Buschelman b->bs2 = bs * bs; 1935a23d5eceSKris Buschelman b->mbs = mbs; 1936a23d5eceSKris Buschelman b->Mbs = Mbs; 1937de64b629SHong Zhang b->nbs = B->cmap->n / bs; 1938de64b629SHong Zhang b->Nbs = B->cmap->N / bs; 1939a23d5eceSKris Buschelman 1940ad540459SPierre Jolivet for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1941d0f46423SBarry Smith b->rstartbs = B->rmap->rstart / bs; 1942d0f46423SBarry Smith b->rendbs = B->rmap->rend / bs; 1943a23d5eceSKris Buschelman 1944d0f46423SBarry Smith b->cstartbs = B->cmap->rstart / bs; 1945d0f46423SBarry Smith b->cendbs = B->cmap->rend / bs; 1946a23d5eceSKris Buschelman 1947cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 1948*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 1949cb7b82ddSBarry Smith #else 19509566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 1951cb7b82ddSBarry Smith #endif 19529566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 19539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 19549566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 19559566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0)); 19569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0b)); 19579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1)); 19589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1a)); 19599566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1b)); 19609566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->sMvctx)); 1961cb7b82ddSBarry Smith 1962cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 19639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 19649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 19659566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 19669566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 19679566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1968cb7b82ddSBarry Smith 1969526dfc15SBarry Smith if (!B->preallocated) { 19709566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 19719566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 19729566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 19739566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1974526dfc15SBarry Smith } 1975a23d5eceSKris Buschelman 19769566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 19779566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 197826fbe8dcSKarl Rupp 1979526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 1980cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 1981cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 1982a23d5eceSKris Buschelman PetscFunctionReturn(0); 1983a23d5eceSKris Buschelman } 1984a23d5eceSKris Buschelman 1985d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1986d71ae5a4SJacob Faibussowitsch { 198702106b30SBarry Smith PetscInt m, rstart, cend; 1988f4259b30SLisandro Dalcin PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1989f4259b30SLisandro Dalcin const PetscInt *JJ = NULL; 1990f4259b30SLisandro Dalcin PetscScalar *values = NULL; 1991bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 19923bd0feecSPierre Jolivet PetscBool nooffprocentries; 1993dfb205c3SBarry Smith 1994dfb205c3SBarry Smith PetscFunctionBegin; 19955f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 19969566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 19979566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 19989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19999566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 20009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2001dfb205c3SBarry Smith m = B->rmap->n / bs; 2002dfb205c3SBarry Smith rstart = B->rmap->rstart / bs; 2003dfb205c3SBarry Smith cend = B->cmap->rend / bs; 2004dfb205c3SBarry Smith 20055f80ce2aSJacob Faibussowitsch PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 20069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2007dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2008dfb205c3SBarry Smith nz = ii[i + 1] - ii[i]; 20095f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 20100cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2011dfb205c3SBarry Smith JJ = jj + ii[i]; 20120cd7f59aSBarry Smith bd = 0; 2013dfb205c3SBarry Smith for (j = 0; j < nz; j++) { 20140cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2015dfb205c3SBarry Smith JJ++; 20160cd7f59aSBarry Smith bd++; 2017dfb205c3SBarry Smith } 2018dfb205c3SBarry Smith d = 0; 2019dfb205c3SBarry Smith for (; j < nz; j++) { 2020dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2021dfb205c3SBarry Smith d++; 2022dfb205c3SBarry Smith } 2023dfb205c3SBarry Smith d_nnz[i] = d; 20240cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20250cd7f59aSBarry Smith nz = nz - bd; 20260cd7f59aSBarry Smith nz_max = PetscMax(nz_max, nz); 2027dfb205c3SBarry Smith } 20289566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 20299566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 20309566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 2031dfb205c3SBarry Smith 2032dfb205c3SBarry Smith values = (PetscScalar *)V; 203348a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2034dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2035dfb205c3SBarry Smith PetscInt row = i + rstart; 2036dfb205c3SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 2037dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2038bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2039dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 20409566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2041bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2042bb80cfbbSStefano Zampini PetscInt j; 20430cd7f59aSBarry Smith for (j = 0; j < ncols; j++) { 20440cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 20459566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 20460cd7f59aSBarry Smith } 20470cd7f59aSBarry Smith } 2048dfb205c3SBarry Smith } 2049dfb205c3SBarry Smith 20509566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 20513bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 20523bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 20539566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 20549566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 20553bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 20563bd0feecSPierre Jolivet 20579566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2058dfb205c3SBarry Smith PetscFunctionReturn(0); 2059dfb205c3SBarry Smith } 2060dfb205c3SBarry Smith 20610bad9183SKris Buschelman /*MC 2062fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2063828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2064828413b8SBarry Smith the matrix is stored. 2065828413b8SBarry Smith 2066828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 206711a5261eSBarry Smith can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 20680bad9183SKris Buschelman 20690bad9183SKris Buschelman Options Database Keys: 207011a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 20710bad9183SKris Buschelman 207211a5261eSBarry Smith Note: 2073476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2074476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2075476417e5SBarry Smith 20760bad9183SKris Buschelman Level: beginner 20770bad9183SKris Buschelman 207811a5261eSBarry Smith .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 20790bad9183SKris Buschelman M*/ 20800bad9183SKris Buschelman 2081d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2082d71ae5a4SJacob Faibussowitsch { 2083b5df2d14SHong Zhang Mat_MPISBAIJ *b; 208494ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2085b5df2d14SHong Zhang 2086b5df2d14SHong Zhang PetscFunctionBegin; 20874dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 2088b0a32e0cSBarry Smith B->data = (void *)b; 20899566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2090b5df2d14SHong Zhang 2091b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2092b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2093b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2094b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 209526fbe8dcSKarl Rupp 20969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 20979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2098b5df2d14SHong Zhang 2099b5df2d14SHong Zhang /* build local table of row and column ownerships */ 21009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2101b5df2d14SHong Zhang 2102b5df2d14SHong Zhang /* build cache for off array entries formed */ 21039566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 210426fbe8dcSKarl Rupp 2105b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21060298fd71SBarry Smith b->colmap = NULL; 21070298fd71SBarry Smith b->garray = NULL; 2108b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2109b5df2d14SHong Zhang 2110b5df2d14SHong Zhang /* stuff used in block assembly */ 2111f4259b30SLisandro Dalcin b->barray = NULL; 2112b5df2d14SHong Zhang 2113b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2114f4259b30SLisandro Dalcin b->lvec = NULL; 2115f4259b30SLisandro Dalcin b->Mvctx = NULL; 2116f4259b30SLisandro Dalcin b->slvec0 = NULL; 2117f4259b30SLisandro Dalcin b->slvec0b = NULL; 2118f4259b30SLisandro Dalcin b->slvec1 = NULL; 2119f4259b30SLisandro Dalcin b->slvec1a = NULL; 2120f4259b30SLisandro Dalcin b->slvec1b = NULL; 2121f4259b30SLisandro Dalcin b->sMvctx = NULL; 2122b5df2d14SHong Zhang 2123b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2124f4259b30SLisandro Dalcin b->rowindices = NULL; 2125f4259b30SLisandro Dalcin b->rowvalues = NULL; 2126b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2127b5df2d14SHong Zhang 2128b5df2d14SHong Zhang /* hash table stuff */ 2129f4259b30SLisandro Dalcin b->ht = NULL; 2130f4259b30SLisandro Dalcin b->hd = NULL; 2131b5df2d14SHong Zhang b->ht_size = 0; 2132b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2133b5df2d14SHong Zhang b->ht_fact = 0; 2134b5df2d14SHong Zhang b->ht_total_ct = 0; 2135b5df2d14SHong Zhang b->ht_insert_ct = 0; 2136b5df2d14SHong Zhang 21377dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 21387a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 21397a868f3eSHong Zhang 2140f4259b30SLisandro Dalcin b->in_loc = NULL; 2141f4259b30SLisandro Dalcin b->v_loc = NULL; 214259ffdab8SBarry Smith b->n_loc = 0; 214394ae4db5SBarry Smith 21449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 21459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 21469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 21479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 21486214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 21499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 21506214f412SHong Zhang #endif 2151d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 21529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2153d24d4204SJose E. Roman #endif 21549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2156aa5a9175SDahai Guo 2157b94d7dedSBarry Smith B->symmetric = PETSC_BOOL3_TRUE; 2158b94d7dedSBarry Smith B->structurally_symmetric = PETSC_BOOL3_TRUE; 2159b94d7dedSBarry Smith B->symmetry_eternal = PETSC_TRUE; 2160b94d7dedSBarry Smith B->structural_symmetry_eternal = PETSC_TRUE; 2161eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 2162b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_FALSE; 2163eb1ec7c1SStefano Zampini #else 2164b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_TRUE; 2165eb1ec7c1SStefano Zampini #endif 216613647f61SHong Zhang 21679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2168d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 21699566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 217094ae4db5SBarry Smith if (flg) { 217194ae4db5SBarry Smith PetscReal fact = 1.39; 21729566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 21739566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 217494ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 21759566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 21769566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 217794ae4db5SBarry Smith } 2178d0609cedSBarry Smith PetscOptionsEnd(); 2179b5df2d14SHong Zhang PetscFunctionReturn(0); 2180b5df2d14SHong Zhang } 2181b5df2d14SHong Zhang 2182209238afSKris Buschelman /*MC 2183002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2184209238afSKris Buschelman 218511a5261eSBarry Smith This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 218611a5261eSBarry Smith and `MATMPISBAIJ` otherwise. 2187209238afSKris Buschelman 218811a5261eSBarry Smith Options Database Key: 2189209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2190209238afSKris Buschelman 2191209238afSKris Buschelman Level: beginner 2192209238afSKris Buschelman 219311a5261eSBarry Smith .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2194209238afSKris Buschelman M*/ 2195209238afSKris Buschelman 2196b5df2d14SHong Zhang /*@C 2197b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2198b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2199b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2200b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2201b5df2d14SHong Zhang 220211a5261eSBarry Smith Collective on B 2203b5df2d14SHong Zhang 2204b5df2d14SHong Zhang Input Parameters: 22051c4f3114SJed Brown + B - the matrix 2206bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2207bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2208b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2209b5df2d14SHong Zhang submatrix (same for all local rows) 2210b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22116d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22120298fd71SBarry Smith (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 221395742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2214b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2215b5df2d14SHong Zhang submatrix (same for all local rows). 2216b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2217c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22180298fd71SBarry Smith (possibly different for each block row) or NULL. 2219b5df2d14SHong Zhang 2220b5df2d14SHong Zhang Options Database Keys: 2221a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2222b5df2d14SHong Zhang block calculations (much slower) 2223a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2224b5df2d14SHong Zhang 2225b5df2d14SHong Zhang Notes: 2226b5df2d14SHong Zhang 222711a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2228b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2229b5df2d14SHong Zhang 223049a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 223149a6f317SBarry Smith 2232b5df2d14SHong Zhang Storage Information: 2233b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2234b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2235b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2236b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2237b5df2d14SHong Zhang 2238b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 2239b5df2d14SHong Zhang the local submatrix with either d_nz or d_nnz (not both). Set 224011a5261eSBarry Smith d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2241b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 2242b5df2d14SHong Zhang off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2243b5df2d14SHong Zhang 224411a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 2245aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2246aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2247aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2248aa95bbe8SBarry Smith 2249b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2250b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2251b5df2d14SHong Zhang 2252b5df2d14SHong Zhang .vb 2253b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2254a4b1a0f6SJed Brown -------------------------- 2255c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2256c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2257c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2258a4b1a0f6SJed Brown -------------------------- 2259b5df2d14SHong Zhang .ve 2260b5df2d14SHong Zhang 2261b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2262b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 22636d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 226411a5261eSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2265b5df2d14SHong Zhang 22666d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 22676d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2268c2fc9fa9SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix 2269c2fc9fa9SBarry Smith 2270b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 2271b5df2d14SHong Zhang one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2272b5df2d14SHong Zhang or you will get TERRIBLE performance; see the users' manual chapter on 2273b5df2d14SHong Zhang matrices. 2274b5df2d14SHong Zhang 2275b5df2d14SHong Zhang Level: intermediate 2276b5df2d14SHong Zhang 227711a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2278b5df2d14SHong Zhang @*/ 2279d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2280d71ae5a4SJacob Faibussowitsch { 2281b5df2d14SHong Zhang PetscFunctionBegin; 22826ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 22836ba663aaSJed Brown PetscValidType(B, 1); 22846ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2285cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2286b5df2d14SHong Zhang PetscFunctionReturn(0); 2287b5df2d14SHong Zhang } 2288b5df2d14SHong Zhang 2289a30f8f8cSSatish Balay /*@C 229011a5261eSBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2291a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2292a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2293a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2294a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2295a30f8f8cSSatish Balay 2296d083f849SBarry Smith Collective 2297a30f8f8cSSatish Balay 2298a30f8f8cSSatish Balay Input Parameters: 2299a30f8f8cSSatish Balay + comm - MPI communicator 230011a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2301bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 230211a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2303a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2304a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 230511a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2306a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2307a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 230811a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 230911a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2310a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2311a30f8f8cSSatish Balay submatrix (same for all local rows) 2312a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23136d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23140298fd71SBarry Smith (possibly different for each block block row) or NULL. 231595742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 231695742e49SBarry Smith set its value even if it is zero. 2317a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2318a30f8f8cSSatish Balay submatrix (same for all local rows). 2319a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2320a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23210298fd71SBarry Smith each block row) or NULL. 2322a30f8f8cSSatish Balay 2323a30f8f8cSSatish Balay Output Parameter: 2324a30f8f8cSSatish Balay . A - the matrix 2325a30f8f8cSSatish Balay 2326a30f8f8cSSatish Balay Options Database Keys: 2327a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2328a30f8f8cSSatish Balay block calculations (much slower) 2329a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2330a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2331a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2332a30f8f8cSSatish Balay 233311a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2334f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 233511a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2336175b88e8SBarry Smith 2337a30f8f8cSSatish Balay Notes: 2338d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 23396d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2340d1be2dadSMatthew Knepley 2341a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2342a30f8f8cSSatish Balay (possibly both). 2343a30f8f8cSSatish Balay 234411a5261eSBarry Smith If` PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2345a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2346a30f8f8cSSatish Balay 234749a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 234849a6f317SBarry Smith 2349a30f8f8cSSatish Balay Storage Information: 2350a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2351a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2352a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2353a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2354a30f8f8cSSatish Balay 2355a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2356a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 23570298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2358a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2359a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2360a30f8f8cSSatish Balay 2361a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2362a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2363a30f8f8cSSatish Balay 2364a30f8f8cSSatish Balay .vb 2365a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2366a4b1a0f6SJed Brown -------------------------- 2367c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2368c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2369c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2370a4b1a0f6SJed Brown -------------------------- 2371a30f8f8cSSatish Balay .ve 2372a30f8f8cSSatish Balay 2373a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2374a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 23756d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 237611a5261eSBarry Smith MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2377a30f8f8cSSatish Balay 23786d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 23796d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2380a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2381a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2382a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2383a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2384a30f8f8cSSatish Balay matrices. 2385a30f8f8cSSatish Balay 2386a30f8f8cSSatish Balay Level: intermediate 2387a30f8f8cSSatish Balay 238811a5261eSBarry Smith .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2389a30f8f8cSSatish Balay @*/ 2390a30f8f8cSSatish Balay 2391d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2392d71ae5a4SJacob Faibussowitsch { 23931302d50aSBarry Smith PetscMPIInt size; 2394a30f8f8cSSatish Balay 2395a30f8f8cSSatish Balay PetscFunctionBegin; 23969566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 23979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 23989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2399273d9f13SBarry Smith if (size > 1) { 24009566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPISBAIJ)); 24019566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2402273d9f13SBarry Smith } else { 24039566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQSBAIJ)); 24049566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2405273d9f13SBarry Smith } 2406a30f8f8cSSatish Balay PetscFunctionReturn(0); 2407a30f8f8cSSatish Balay } 2408a30f8f8cSSatish Balay 2409d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2410d71ae5a4SJacob Faibussowitsch { 2411a30f8f8cSSatish Balay Mat mat; 2412a30f8f8cSSatish Balay Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2413d0f46423SBarry Smith PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2414387bc808SHong Zhang PetscScalar *array; 2415a30f8f8cSSatish Balay 2416a30f8f8cSSatish Balay PetscFunctionBegin; 2417f4259b30SLisandro Dalcin *newmat = NULL; 241826fbe8dcSKarl Rupp 24199566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 24209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 24219566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 24229566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 24239566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2424e1b6402fSHong Zhang 2425d5f3da31SBarry Smith mat->factortype = matin->factortype; 2426273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 242782327fa8SHong Zhang mat->assembled = PETSC_TRUE; 24287fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24297fff6886SHong Zhang 2430b5df2d14SHong Zhang a = (Mat_MPISBAIJ *)mat->data; 2431a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2432a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2433a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2434a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2435a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2436a30f8f8cSSatish Balay 2437a30f8f8cSSatish Balay a->size = oldmat->size; 2438a30f8f8cSSatish Balay a->rank = oldmat->rank; 2439a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2440a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2441f4259b30SLisandro Dalcin a->rowindices = NULL; 2442f4259b30SLisandro Dalcin a->rowvalues = NULL; 2443a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2444f4259b30SLisandro Dalcin a->barray = NULL; 2445899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2446899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2447899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2448899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2449a30f8f8cSSatish Balay 2450a30f8f8cSSatish Balay /* hash table stuff */ 2451f4259b30SLisandro Dalcin a->ht = NULL; 2452f4259b30SLisandro Dalcin a->hd = NULL; 2453a30f8f8cSSatish Balay a->ht_size = 0; 2454a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2455a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2456a30f8f8cSSatish Balay a->ht_total_ct = 0; 2457a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2458a30f8f8cSSatish Balay 24599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2460a30f8f8cSSatish Balay if (oldmat->colmap) { 2461a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2462*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2463a30f8f8cSSatish Balay #else 24649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 24659566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2466a30f8f8cSSatish Balay #endif 2467f4259b30SLisandro Dalcin } else a->colmap = NULL; 2468387bc808SHong Zhang 2469a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 24709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len, &a->garray)); 24719566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2472f4259b30SLisandro Dalcin } else a->garray = NULL; 2473a30f8f8cSSatish Balay 24749566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 24759566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 24769566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 247782327fa8SHong Zhang 24789566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 24799566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2480387bc808SHong Zhang 24819566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(a->slvec1, &nt)); 24829566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec1, &array)); 24839566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 24849566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 24859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec1, &array)); 24869566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &array)); 24879566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 24889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &array)); 2489387bc808SHong Zhang 2490387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 24919566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2492387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 249382327fa8SHong Zhang 24949566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 24959566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 24969566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2497a30f8f8cSSatish Balay *newmat = mat; 2498a30f8f8cSSatish Balay PetscFunctionReturn(0); 2499a30f8f8cSSatish Balay } 2500a30f8f8cSSatish Balay 2501618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2502618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2503618cc2edSLisandro Dalcin 2504d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2505d71ae5a4SJacob Faibussowitsch { 25067f489da9SVaclav Hapla PetscBool isbinary; 250795936485SShri Abhyankar 250895936485SShri Abhyankar PetscFunctionBegin; 25099566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 25105f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 25119566063dSJacob Faibussowitsch PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 251295936485SShri Abhyankar PetscFunctionReturn(0); 251395936485SShri Abhyankar } 251495936485SShri Abhyankar 2515d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2516d71ae5a4SJacob Faibussowitsch { 251724d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2518f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2519ca54ac64SHong Zhang PetscReal atmp; 252087828ca2SBarry Smith PetscReal *work, *svalues, *rvalues; 25211302d50aSBarry Smith PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 25221302d50aSBarry Smith PetscMPIInt rank, size; 25231302d50aSBarry Smith PetscInt *rowners_bs, dest, count, source; 252487828ca2SBarry Smith PetscScalar *va; 25258a1c53f2SBarry Smith MatScalar *ba; 2526f4c0e9e4SHong Zhang MPI_Status stat; 252724d5174aSHong Zhang 252824d5174aSHong Zhang PetscFunctionBegin; 25295f80ce2aSJacob Faibussowitsch PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 25309566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 25319566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &va)); 2532f4c0e9e4SHong Zhang 25339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 25349566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2535f4c0e9e4SHong Zhang 2536d0f46423SBarry Smith bs = A->rmap->bs; 2537f4c0e9e4SHong Zhang mbs = a->mbs; 2538f4c0e9e4SHong Zhang Mbs = a->Mbs; 2539f4c0e9e4SHong Zhang ba = b->a; 2540f4c0e9e4SHong Zhang bi = b->i; 2541f4c0e9e4SHong Zhang bj = b->j; 2542f4c0e9e4SHong Zhang 2543f4c0e9e4SHong Zhang /* find ownerships */ 2544d0f46423SBarry Smith rowners_bs = A->rmap->range; 2545f4c0e9e4SHong Zhang 2546f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 25479566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs * Mbs, &work)); 2548f4c0e9e4SHong Zhang 2549f4c0e9e4SHong Zhang /* row_max for B */ 2550b8475685SHong Zhang if (rank != size - 1) { 2551f4c0e9e4SHong Zhang for (i = 0; i < mbs; i++) { 25529371c9d4SSatish Balay ncols = bi[1] - bi[0]; 25539371c9d4SSatish Balay bi++; 2554f4c0e9e4SHong Zhang brow = bs * i; 2555f4c0e9e4SHong Zhang for (j = 0; j < ncols; j++) { 2556f4c0e9e4SHong Zhang bcol = bs * (*bj); 2557f4c0e9e4SHong Zhang for (kcol = 0; kcol < bs; kcol++) { 2558ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 255904d41228SHong Zhang col += rowners_bs[rank + 1]; /* global col index */ 2560f4c0e9e4SHong Zhang for (krow = 0; krow < bs; krow++) { 25619371c9d4SSatish Balay atmp = PetscAbsScalar(*ba); 25629371c9d4SSatish Balay ba++; 2563ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2564ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2565f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2566f4c0e9e4SHong Zhang } 2567f4c0e9e4SHong Zhang } 2568f4c0e9e4SHong Zhang bj++; 2569f4c0e9e4SHong Zhang } 2570f4c0e9e4SHong Zhang } 2571f4c0e9e4SHong Zhang 2572f4c0e9e4SHong Zhang /* send values to its owners */ 2573f4c0e9e4SHong Zhang for (dest = rank + 1; dest < size; dest++) { 2574f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2575ca54ac64SHong Zhang count = rowners_bs[dest + 1] - rowners_bs[dest]; 25769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2577ca54ac64SHong Zhang } 2578f4c0e9e4SHong Zhang } 2579f4c0e9e4SHong Zhang 2580f4c0e9e4SHong Zhang /* receive values */ 2581ca54ac64SHong Zhang if (rank) { 2582f4c0e9e4SHong Zhang rvalues = work; 2583ca54ac64SHong Zhang count = rowners_bs[rank + 1] - rowners_bs[rank]; 2584f4c0e9e4SHong Zhang for (source = 0; source < rank; source++) { 25859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2586f4c0e9e4SHong Zhang /* process values */ 2587f4c0e9e4SHong Zhang for (i = 0; i < count; i++) { 2588ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2589f4c0e9e4SHong Zhang } 2590f4c0e9e4SHong Zhang } 2591ca54ac64SHong Zhang } 2592f4c0e9e4SHong Zhang 25939566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &va)); 25949566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 259524d5174aSHong Zhang PetscFunctionReturn(0); 259624d5174aSHong Zhang } 25972798e883SHong Zhang 2598d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2599d71ae5a4SJacob Faibussowitsch { 26002798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2601d0f46423SBarry Smith PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 26023649974fSBarry Smith PetscScalar *x, *ptr, *from; 2603ffe4fb16SHong Zhang Vec bb1; 26043649974fSBarry Smith const PetscScalar *b; 2605ffe4fb16SHong Zhang 2606ffe4fb16SHong Zhang PetscFunctionBegin; 26075f80ce2aSJacob Faibussowitsch PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 26085f80ce2aSJacob Faibussowitsch PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2609ffe4fb16SHong Zhang 2610a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 26119566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2612a2b30743SBarry Smith PetscFunctionReturn(0); 2613a2b30743SBarry Smith } 2614a2b30743SBarry Smith 2615ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2616ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 26179566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2618ffe4fb16SHong Zhang its--; 2619ffe4fb16SHong Zhang } 2620ffe4fb16SHong Zhang 26219566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &bb1)); 2622ffe4fb16SHong Zhang while (its--) { 2623ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 26249566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2625ffe4fb16SHong Zhang 2626ffe4fb16SHong Zhang /* copy xx into slvec0a */ 26279566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &ptr)); 26289566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26299566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 26309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2631ffe4fb16SHong Zhang 26329566063dSJacob Faibussowitsch PetscCall(VecScale(mat->slvec0, -1.0)); 2633ffe4fb16SHong Zhang 2634ffe4fb16SHong Zhang /* copy bb into slvec1a */ 26359566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1, &ptr)); 26369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26379566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 26389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2639ffe4fb16SHong Zhang 2640ffe4fb16SHong Zhang /* set slvec1b = 0 */ 26419566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 2642ffe4fb16SHong Zhang 26439566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 26449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 26459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 26469566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2647ffe4fb16SHong Zhang 2648ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 26499566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2650ffe4fb16SHong Zhang 2651ffe4fb16SHong Zhang /* local diagonal sweep */ 26529566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2653ffe4fb16SHong Zhang } 26549566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 2655fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26569566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2657fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26589566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2659fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2660fa22f6d0SBarry Smith Vec xx1; 2661ace3abfcSBarry Smith PetscBool hasop; 266220f1ed55SBarry Smith const PetscScalar *diag; 2663887ee2caSBarry Smith PetscScalar *sl, scale = (omega - 2.0) / omega; 266420f1ed55SBarry Smith PetscInt i, n; 2665fa22f6d0SBarry Smith 2666fa22f6d0SBarry Smith if (!mat->xx1) { 26679566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->xx1)); 26689566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->bb1)); 2669fa22f6d0SBarry Smith } 2670fa22f6d0SBarry Smith xx1 = mat->xx1; 2671fa22f6d0SBarry Smith bb1 = mat->bb1; 2672fa22f6d0SBarry Smith 26739566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2674fa22f6d0SBarry Smith 2675fa22f6d0SBarry Smith if (!mat->diag) { 2676effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 26779566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 26789566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin, mat->diag)); 2679fa22f6d0SBarry Smith } 26809566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2681fa22f6d0SBarry Smith 2682fa22f6d0SBarry Smith if (hasop) { 26839566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 26849566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 268520f1ed55SBarry Smith } else { 268620f1ed55SBarry Smith /* 268720f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 26889566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 26899566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 269020f1ed55SBarry Smith */ 26919566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1a, &sl)); 26929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mat->diag, &diag)); 26939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26949566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26959566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &n)); 2696887ee2caSBarry Smith if (omega == 1.0) { 269726fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 26989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * n)); 2699887ee2caSBarry Smith } else { 270026fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 27019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * n)); 2702887ee2caSBarry Smith } 27039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 27049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 27059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 27069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 270720f1ed55SBarry Smith } 2708fa22f6d0SBarry Smith 2709fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 27109566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 27119566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 27129566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &from)); 27139566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 27149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 27159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &from)); 27169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 27179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27199566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2720fa22f6d0SBarry Smith 2721fa22f6d0SBarry Smith /* local sweep */ 27229566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 27239566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx, 1.0, xx1)); 2724f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2725ffe4fb16SHong Zhang PetscFunctionReturn(0); 2726ffe4fb16SHong Zhang } 2727ffe4fb16SHong Zhang 2728dfb205c3SBarry Smith /*@ 272911a5261eSBarry Smith MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2730dfb205c3SBarry Smith CSR format the local rows. 2731dfb205c3SBarry Smith 2732d083f849SBarry Smith Collective 2733dfb205c3SBarry Smith 2734dfb205c3SBarry Smith Input Parameters: 2735dfb205c3SBarry Smith + comm - MPI communicator 2736dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 273711a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 2738dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 273911a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2740dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 274111a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 274211a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2743483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2744dfb205c3SBarry Smith . j - column indices 2745dfb205c3SBarry Smith - a - matrix values 2746dfb205c3SBarry Smith 2747dfb205c3SBarry Smith Output Parameter: 2748dfb205c3SBarry Smith . mat - the matrix 2749dfb205c3SBarry Smith 2750dfb205c3SBarry Smith Level: intermediate 2751dfb205c3SBarry Smith 2752dfb205c3SBarry Smith Notes: 2753dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2754dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 2755dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2756dfb205c3SBarry Smith 2757dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2758dfb205c3SBarry Smith 275911a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2760db781477SPatrick Sanan `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2761dfb205c3SBarry Smith @*/ 2762d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2763d71ae5a4SJacob Faibussowitsch { 2764dfb205c3SBarry Smith PetscFunctionBegin; 27655f80ce2aSJacob Faibussowitsch PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 27665f80ce2aSJacob Faibussowitsch PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 27679566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 27689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 27699566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPISBAIJ)); 27709566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2771dfb205c3SBarry Smith PetscFunctionReturn(0); 2772dfb205c3SBarry Smith } 2773dfb205c3SBarry Smith 2774dfb205c3SBarry Smith /*@C 277511a5261eSBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2776dfb205c3SBarry Smith 2777d083f849SBarry Smith Collective 2778dfb205c3SBarry Smith 2779dfb205c3SBarry Smith Input Parameters: 27801c4f3114SJed Brown + B - the matrix 2781dfb205c3SBarry Smith . bs - the block size 2782dfb205c3SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2783dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2784dfb205c3SBarry Smith - v - optional values in the matrix 2785dfb205c3SBarry Smith 2786664954b6SBarry Smith Level: advanced 2787664954b6SBarry Smith 2788664954b6SBarry Smith Notes: 27890cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 27900cd7f59aSBarry Smith and usually the numerical values as well 27910cd7f59aSBarry Smith 279250c5228eSBarry Smith Any entries below the diagonal are ignored 2793dfb205c3SBarry Smith 279411a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2795dfb205c3SBarry Smith @*/ 2796d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2797d71ae5a4SJacob Faibussowitsch { 2798dfb205c3SBarry Smith PetscFunctionBegin; 2799cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2800dfb205c3SBarry Smith PetscFunctionReturn(0); 2801dfb205c3SBarry Smith } 2802dfb205c3SBarry Smith 2803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2804d71ae5a4SJacob Faibussowitsch { 280510c56fdeSHong Zhang PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 280610c56fdeSHong Zhang PetscInt *indx; 280710c56fdeSHong Zhang PetscScalar *values; 2808dfb205c3SBarry Smith 28094dcd73b1SHong Zhang PetscFunctionBegin; 28109566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 281110c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 281210c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2813de25e9cbSPierre Jolivet PetscInt *dnz, *onz, mbs, Nbs, nbs; 281410c56fdeSHong Zhang PetscInt *bindx, rmax = a->rmax, j; 2815de25e9cbSPierre Jolivet PetscMPIInt rank, size; 28164dcd73b1SHong Zhang 28179566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28189371c9d4SSatish Balay mbs = m / bs; 28199371c9d4SSatish Balay Nbs = N / cbs; 282048a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2821da91a574SPierre Jolivet nbs = n / cbs; 28224dcd73b1SHong Zhang 28239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(rmax, &bindx)); 2824d0609cedSBarry Smith MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2825de25e9cbSPierre Jolivet 28269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 28279566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &size)); 2828de25e9cbSPierre Jolivet if (rank == size - 1) { 2829de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 28305f80ce2aSJacob Faibussowitsch PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2831de25e9cbSPierre Jolivet } 2832de25e9cbSPierre Jolivet 2833d0609cedSBarry Smith rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 28349566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 283510c56fdeSHong Zhang for (i = 0; i < mbs; i++) { 28369566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 28374dcd73b1SHong Zhang nnz = nnz / bs; 28384dcd73b1SHong Zhang for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 28399566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 28409566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 28414dcd73b1SHong Zhang } 28429566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28439566063dSJacob Faibussowitsch PetscCall(PetscFree(bindx)); 28444dcd73b1SHong Zhang 28459566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 28469566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 28479566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 28489566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, MATSBAIJ)); 28499566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 28509566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2851d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 28524dcd73b1SHong Zhang } 28534dcd73b1SHong Zhang 285410c56fdeSHong Zhang /* numeric phase */ 28559566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28569566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 28574dcd73b1SHong Zhang 28589566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 28594dcd73b1SHong Zhang for (i = 0; i < m; i++) { 28609566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28614dcd73b1SHong Zhang Ii = i + rstart; 28629566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 28639566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28644dcd73b1SHong Zhang } 28659566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28669566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 28679566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 28684dcd73b1SHong Zhang PetscFunctionReturn(0); 28694dcd73b1SHong Zhang } 2870