1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 5a30f8f8cSSatish Balay 626cec326SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 726cec326SBarry Smith { 826cec326SBarry Smith Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 926cec326SBarry Smith 1026cec326SBarry Smith PetscFunctionBegin; 1126cec326SBarry Smith #if defined(PETSC_USE_LOG) 1226cec326SBarry Smith PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 1326cec326SBarry Smith #endif 1426cec326SBarry Smith PetscCall(MatStashDestroy_Private(&mat->stash)); 1526cec326SBarry Smith PetscCall(MatStashDestroy_Private(&mat->bstash)); 1626cec326SBarry Smith PetscCall(MatDestroy(&baij->A)); 1726cec326SBarry Smith PetscCall(MatDestroy(&baij->B)); 1826cec326SBarry Smith #if defined(PETSC_USE_CTABLE) 1926cec326SBarry Smith PetscCall(PetscHMapIDestroy(&baij->colmap)); 2026cec326SBarry Smith #else 2126cec326SBarry Smith PetscCall(PetscFree(baij->colmap)); 2226cec326SBarry Smith #endif 2326cec326SBarry Smith PetscCall(PetscFree(baij->garray)); 2426cec326SBarry Smith PetscCall(VecDestroy(&baij->lvec)); 2526cec326SBarry Smith PetscCall(VecScatterDestroy(&baij->Mvctx)); 2626cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec0)); 2726cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec0b)); 2826cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1)); 2926cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1a)); 3026cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1b)); 3126cec326SBarry Smith PetscCall(VecScatterDestroy(&baij->sMvctx)); 3226cec326SBarry Smith PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 3326cec326SBarry Smith PetscCall(PetscFree(baij->barray)); 3426cec326SBarry Smith PetscCall(PetscFree(baij->hd)); 3526cec326SBarry Smith PetscCall(VecDestroy(&baij->diag)); 3626cec326SBarry Smith PetscCall(VecDestroy(&baij->bb1)); 3726cec326SBarry Smith PetscCall(VecDestroy(&baij->xx1)); 3826cec326SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 3926cec326SBarry Smith PetscCall(PetscFree(baij->setvaluescopy)); 4026cec326SBarry Smith #endif 4126cec326SBarry Smith PetscCall(PetscFree(baij->in_loc)); 4226cec326SBarry Smith PetscCall(PetscFree(baij->v_loc)); 4326cec326SBarry Smith PetscCall(PetscFree(baij->rangebs)); 4426cec326SBarry Smith PetscCall(PetscFree(mat->data)); 4526cec326SBarry Smith 4626cec326SBarry Smith PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 4726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 4826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 4926cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 5026cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 5126cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL) 5226cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 5326cec326SBarry Smith #endif 5426cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK) 5526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 5626cec326SBarry Smith #endif 5726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 5826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 5926cec326SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 6026cec326SBarry Smith } 6126cec326SBarry Smith 6226cec326SBarry Smith /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 6326cec326SBarry Smith #define TYPE SBAIJ 6426cec326SBarry Smith #define TYPE_SBAIJ 6526cec326SBarry Smith #include "../src/mat/impls/aij/mpi/mpihashmat.h" 6626cec326SBarry Smith #undef TYPE 6726cec326SBarry Smith #undef TYPE_SBAIJ 6826cec326SBarry Smith 696214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 70cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 716214f412SHong Zhang #endif 72d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 73d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 74d24d4204SJose E. Roman #endif 75b147fbf3SStefano Zampini 76b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 77d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 78d71ae5a4SJacob Faibussowitsch { 79b147fbf3SStefano Zampini Mat preallocator; 80b147fbf3SStefano Zampini PetscInt r, rstart, rend; 81b147fbf3SStefano Zampini PetscInt bs, i, m, n, M, N; 82b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 83b147fbf3SStefano Zampini 84b147fbf3SStefano Zampini PetscFunctionBegin; 85b147fbf3SStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 86b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B, nm, 2); 87b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 88b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 899566063dSJacob Faibussowitsch PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 905f80ce2aSJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 91b147fbf3SStefano Zampini } 92b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B, fill, 5); 939566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(B, &bs)); 949566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &N)); 959566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, &n)); 969566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 979566063dSJacob Faibussowitsch PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(preallocator, bs)); 999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(preallocator, m, n, M, N)); 1009566063dSJacob Faibussowitsch PetscCall(MatSetUp(preallocator)); 1019566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 102b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 103b147fbf3SStefano Zampini PetscInt ncols; 104b147fbf3SStefano Zampini const PetscInt *row; 105b147fbf3SStefano Zampini const PetscScalar *vals; 106b147fbf3SStefano Zampini 107b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 1089566063dSJacob Faibussowitsch PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 1099566063dSJacob Faibussowitsch PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 11048a46eb9SPierre Jolivet if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 1119566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 112b147fbf3SStefano Zampini } 113b147fbf3SStefano Zampini } 1149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 1159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 1169566063dSJacob Faibussowitsch PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&preallocator)); 1183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 119b147fbf3SStefano Zampini } 120b147fbf3SStefano Zampini 121d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 122d71ae5a4SJacob Faibussowitsch { 123b147fbf3SStefano Zampini Mat B; 124b147fbf3SStefano Zampini PetscInt r; 125b147fbf3SStefano Zampini 126b147fbf3SStefano Zampini PetscFunctionBegin; 127b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 12828d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE, isdense; 129b147fbf3SStefano Zampini PetscInt bs; 130b147fbf3SStefano Zampini 1319566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1329566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1339566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 1349566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 1359566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, bs)); 1369566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 1389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 13928d58a37SPierre Jolivet if (!isdense) { 1409566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1419566063dSJacob Faibussowitsch PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 1429566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 14328d58a37SPierre Jolivet } else { 1449566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 14528d58a37SPierre Jolivet } 14628d58a37SPierre Jolivet } else { 14728d58a37SPierre Jolivet B = *newmat; 1489566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(B)); 14928d58a37SPierre Jolivet } 150b147fbf3SStefano Zampini 1519566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 152b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 153b147fbf3SStefano Zampini PetscInt ncols; 154b147fbf3SStefano Zampini const PetscInt *row; 155b147fbf3SStefano Zampini const PetscScalar *vals; 156b147fbf3SStefano Zampini 1579566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 1589566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 159eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 160b94d7dedSBarry Smith if (A->hermitian == PETSC_BOOL3_TRUE) { 161eb1ec7c1SStefano Zampini PetscInt i; 16248a46eb9SPierre Jolivet for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 163eb1ec7c1SStefano Zampini } else { 1649566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 165eb1ec7c1SStefano Zampini } 166eb1ec7c1SStefano Zampini #else 1679566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 168eb1ec7c1SStefano Zampini #endif 1699566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 170b147fbf3SStefano Zampini } 1719566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 1729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 174b147fbf3SStefano Zampini 175b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 1769566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 177b147fbf3SStefano Zampini } else { 178b147fbf3SStefano Zampini *newmat = B; 179b147fbf3SStefano Zampini } 1803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 181b147fbf3SStefano Zampini } 182b147fbf3SStefano Zampini 183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 184d71ae5a4SJacob Faibussowitsch { 185f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 186a30f8f8cSSatish Balay 187a30f8f8cSSatish Balay PetscFunctionBegin; 1889566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 1899566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191a30f8f8cSSatish Balay } 192a30f8f8cSSatish Balay 193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 194d71ae5a4SJacob Faibussowitsch { 195f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 196a30f8f8cSSatish Balay 197a30f8f8cSSatish Balay PetscFunctionBegin; 1989566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 1999566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 2003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 201a30f8f8cSSatish Balay } 202a30f8f8cSSatish Balay 203d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 204a8f51744SPierre Jolivet do { \ 205a30f8f8cSSatish Balay brow = row / bs; \ 2069371c9d4SSatish Balay rp = aj + ai[brow]; \ 2079371c9d4SSatish Balay ap = aa + bs2 * ai[brow]; \ 2089371c9d4SSatish Balay rmax = aimax[brow]; \ 2099371c9d4SSatish Balay nrow = ailen[brow]; \ 210a30f8f8cSSatish Balay bcol = col / bs; \ 2119371c9d4SSatish Balay ridx = row % bs; \ 2129371c9d4SSatish Balay cidx = col % bs; \ 2139371c9d4SSatish Balay low = 0; \ 2149371c9d4SSatish Balay high = nrow; \ 215a30f8f8cSSatish Balay while (high - low > 3) { \ 216a30f8f8cSSatish Balay t = (low + high) / 2; \ 217a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 218a30f8f8cSSatish Balay else low = t; \ 219a30f8f8cSSatish Balay } \ 220a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 221a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 222a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 223a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 224a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 225a30f8f8cSSatish Balay else *bap = value; \ 226a30f8f8cSSatish Balay goto a_noinsert; \ 227a30f8f8cSSatish Balay } \ 228a30f8f8cSSatish Balay } \ 229a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 2305f80ce2aSJacob Faibussowitsch PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 231fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 232a30f8f8cSSatish Balay N = nrow++ - 1; \ 233a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2349566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2359566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2369566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 237a30f8f8cSSatish Balay rp[_i] = bcol; \ 238a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 239e56f5c9eSBarry Smith A->nonzerostate++; \ 240a30f8f8cSSatish Balay a_noinsert:; \ 241a30f8f8cSSatish Balay ailen[brow] = nrow; \ 242a8f51744SPierre Jolivet } while (0) 243e5e170daSBarry Smith 244d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 245a8f51744SPierre Jolivet do { \ 246a30f8f8cSSatish Balay brow = row / bs; \ 2479371c9d4SSatish Balay rp = bj + bi[brow]; \ 2489371c9d4SSatish Balay ap = ba + bs2 * bi[brow]; \ 2499371c9d4SSatish Balay rmax = bimax[brow]; \ 2509371c9d4SSatish Balay nrow = bilen[brow]; \ 251a30f8f8cSSatish Balay bcol = col / bs; \ 2529371c9d4SSatish Balay ridx = row % bs; \ 2539371c9d4SSatish Balay cidx = col % bs; \ 2549371c9d4SSatish Balay low = 0; \ 2559371c9d4SSatish Balay high = nrow; \ 256a30f8f8cSSatish Balay while (high - low > 3) { \ 257a30f8f8cSSatish Balay t = (low + high) / 2; \ 258a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 259a30f8f8cSSatish Balay else low = t; \ 260a30f8f8cSSatish Balay } \ 261a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 262a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 263a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 264a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 265a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 266a30f8f8cSSatish Balay else *bap = value; \ 267a30f8f8cSSatish Balay goto b_noinsert; \ 268a30f8f8cSSatish Balay } \ 269a30f8f8cSSatish Balay } \ 270a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 2715f80ce2aSJacob Faibussowitsch PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 272fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 273a30f8f8cSSatish Balay N = nrow++ - 1; \ 274a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2759566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2769566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2779566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 278a30f8f8cSSatish Balay rp[_i] = bcol; \ 279a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 280e56f5c9eSBarry Smith B->nonzerostate++; \ 281a30f8f8cSSatish Balay b_noinsert:; \ 282a30f8f8cSSatish Balay bilen[brow] = nrow; \ 283a8f51744SPierre Jolivet } while (0) 284a30f8f8cSSatish Balay 285a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 286da81f932SPierre Jolivet Any a(i,j) with i>j input by user is ignored or generates an error 287a30f8f8cSSatish Balay */ 288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 289d71ae5a4SJacob Faibussowitsch { 290a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 291a30f8f8cSSatish Balay MatScalar value; 292ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 2931302d50aSBarry Smith PetscInt i, j, row, col; 294d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 295d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 296d0f46423SBarry Smith PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 297a30f8f8cSSatish Balay 298a30f8f8cSSatish Balay /* Some Variables required in the macro */ 299a30f8f8cSSatish Balay Mat A = baij->A; 300a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 3011302d50aSBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 302a30f8f8cSSatish Balay MatScalar *aa = a->a; 303a30f8f8cSSatish Balay 304a30f8f8cSSatish Balay Mat B = baij->B; 305a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 3061302d50aSBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 307a30f8f8cSSatish Balay MatScalar *ba = b->a; 308a30f8f8cSSatish Balay 3091302d50aSBarry Smith PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 3101302d50aSBarry Smith PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 311a30f8f8cSSatish Balay MatScalar *ap, *bap; 312a30f8f8cSSatish Balay 313a30f8f8cSSatish Balay /* for stash */ 3140298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 3150298fd71SBarry Smith MatScalar *v_loc = NULL; 316a30f8f8cSSatish Balay 317a30f8f8cSSatish Balay PetscFunctionBegin; 318a30f8f8cSSatish Balay if (!baij->donotstash) { 31959ffdab8SBarry Smith if (n > baij->n_loc) { 3209566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 3219566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 3229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->in_loc)); 3239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->v_loc)); 32426fbe8dcSKarl Rupp 32559ffdab8SBarry Smith baij->n_loc = n; 32659ffdab8SBarry Smith } 32759ffdab8SBarry Smith in_loc = baij->in_loc; 32859ffdab8SBarry Smith v_loc = baij->v_loc; 329a30f8f8cSSatish Balay } 330a30f8f8cSSatish Balay 331a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 332a30f8f8cSSatish Balay if (im[i] < 0) continue; 3335f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 334a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 335a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 336a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 33701b2bd88SHong Zhang if (im[i] / bs > in[j] / bs) { 33801b2bd88SHong Zhang if (a->ignore_ltriangular) { 33901b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 34026fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 34101b2bd88SHong Zhang } 342a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 343a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 3449371c9d4SSatish Balay brow = row / bs; 3459371c9d4SSatish Balay bcol = col / bs; 346a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 347db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 348db4deed7SKarl Rupp else value = v[i + j * m]; 349d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 3509566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 351f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 352f7d195e4SLawrence Mitchell continue; 353f7d195e4SLawrence Mitchell } else { 354f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 355f7d195e4SLawrence Mitchell /* off-diag entry (B) */ 356a30f8f8cSSatish Balay if (mat->was_assembled) { 35748a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 358a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 359eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 36071730473SSatish Balay col = col - 1; 361a30f8f8cSSatish Balay #else 36271730473SSatish Balay col = baij->colmap[in[j] / bs] - 1; 363a30f8f8cSSatish Balay #endif 364a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 3659566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 366a30f8f8cSSatish Balay col = in[j]; 367a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 368a30f8f8cSSatish Balay B = baij->B; 369a30f8f8cSSatish Balay b = (Mat_SeqBAIJ *)(B)->data; 3709371c9d4SSatish Balay bimax = b->imax; 3719371c9d4SSatish Balay bi = b->i; 3729371c9d4SSatish Balay bilen = b->ilen; 3739371c9d4SSatish Balay bj = b->j; 374a30f8f8cSSatish Balay ba = b->a; 37571730473SSatish Balay } else col += in[j] % bs; 376a30f8f8cSSatish Balay } else col = in[j]; 377db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 378db4deed7SKarl Rupp else value = v[i + j * m]; 379d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 3809566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 381a30f8f8cSSatish Balay } 382a30f8f8cSSatish Balay } 383a30f8f8cSSatish Balay } else { /* off processor entry */ 3845f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 385a30f8f8cSSatish Balay if (!baij->donotstash) { 3865080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 387a30f8f8cSSatish Balay n_loc = 0; 388a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 389f65c83cfSHong Zhang if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 390a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 391a30f8f8cSSatish Balay if (roworiented) { 392a30f8f8cSSatish Balay v_loc[n_loc] = v[i * n + j]; 393a30f8f8cSSatish Balay } else { 394a30f8f8cSSatish Balay v_loc[n_loc] = v[j * m + i]; 395a30f8f8cSSatish Balay } 396a30f8f8cSSatish Balay n_loc++; 397a30f8f8cSSatish Balay } 3989566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 399a30f8f8cSSatish Balay } 400a30f8f8cSSatish Balay } 401a30f8f8cSSatish Balay } 4023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 403a30f8f8cSSatish Balay } 404a30f8f8cSSatish Balay 405d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 406d71ae5a4SJacob Faibussowitsch { 40736bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 40836bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 40936bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 41036bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 41136bd2089SBarry Smith PetscBool roworiented = a->roworiented; 41236bd2089SBarry Smith const PetscScalar *value = v; 41336bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 41436bd2089SBarry Smith 41536bd2089SBarry Smith PetscFunctionBegin; 41636bd2089SBarry Smith if (col < row) { 4173ba16761SJacob Faibussowitsch if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 41836bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 41936bd2089SBarry Smith } 42036bd2089SBarry Smith rp = aj + ai[row]; 42136bd2089SBarry Smith ap = aa + bs2 * ai[row]; 42236bd2089SBarry Smith rmax = imax[row]; 42336bd2089SBarry Smith nrow = ailen[row]; 42436bd2089SBarry Smith value = v; 42536bd2089SBarry Smith low = 0; 42636bd2089SBarry Smith high = nrow; 42736bd2089SBarry Smith 42836bd2089SBarry Smith while (high - low > 7) { 42936bd2089SBarry Smith t = (low + high) / 2; 43036bd2089SBarry Smith if (rp[t] > col) high = t; 43136bd2089SBarry Smith else low = t; 43236bd2089SBarry Smith } 43336bd2089SBarry Smith for (i = low; i < high; i++) { 43436bd2089SBarry Smith if (rp[i] > col) break; 43536bd2089SBarry Smith if (rp[i] == col) { 43636bd2089SBarry Smith bap = ap + bs2 * i; 43736bd2089SBarry Smith if (roworiented) { 43836bd2089SBarry Smith if (is == ADD_VALUES) { 43936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 440ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 44136bd2089SBarry Smith } 44236bd2089SBarry Smith } else { 44336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 444ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 44536bd2089SBarry Smith } 44636bd2089SBarry Smith } 44736bd2089SBarry Smith } else { 44836bd2089SBarry Smith if (is == ADD_VALUES) { 44936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 450ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ += *value++; 45136bd2089SBarry Smith } 45236bd2089SBarry Smith } else { 45336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 454ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 45536bd2089SBarry Smith } 45636bd2089SBarry Smith } 45736bd2089SBarry Smith } 45836bd2089SBarry Smith goto noinsert2; 45936bd2089SBarry Smith } 46036bd2089SBarry Smith } 46136bd2089SBarry Smith if (nonew == 1) goto noinsert2; 4625f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 46336bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4649371c9d4SSatish Balay N = nrow++ - 1; 4659371c9d4SSatish Balay high++; 46636bd2089SBarry Smith /* shift up all the later entries in this row */ 4679566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4689566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 46936bd2089SBarry Smith rp[i] = col; 47036bd2089SBarry Smith bap = ap + bs2 * i; 47136bd2089SBarry Smith if (roworiented) { 47236bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 473ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 47436bd2089SBarry Smith } 47536bd2089SBarry Smith } else { 47636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 477ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 47836bd2089SBarry Smith } 47936bd2089SBarry Smith } 48036bd2089SBarry Smith noinsert2:; 48136bd2089SBarry Smith ailen[row] = nrow; 4823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 48336bd2089SBarry Smith } 48436bd2089SBarry Smith 48536bd2089SBarry Smith /* 48636bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 48736bd2089SBarry Smith */ 488d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 489d71ae5a4SJacob Faibussowitsch { 49036bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 49136bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 49236bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 49336bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 49436bd2089SBarry Smith PetscBool roworiented = a->roworiented; 49536bd2089SBarry Smith const PetscScalar *value = v; 49636bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 49736bd2089SBarry Smith 49836bd2089SBarry Smith PetscFunctionBegin; 49936bd2089SBarry Smith rp = aj + ai[row]; 50036bd2089SBarry Smith ap = aa + bs2 * ai[row]; 50136bd2089SBarry Smith rmax = imax[row]; 50236bd2089SBarry Smith nrow = ailen[row]; 50336bd2089SBarry Smith low = 0; 50436bd2089SBarry Smith high = nrow; 50536bd2089SBarry Smith value = v; 50636bd2089SBarry Smith while (high - low > 7) { 50736bd2089SBarry Smith t = (low + high) / 2; 50836bd2089SBarry Smith if (rp[t] > col) high = t; 50936bd2089SBarry Smith else low = t; 51036bd2089SBarry Smith } 51136bd2089SBarry Smith for (i = low; i < high; i++) { 51236bd2089SBarry Smith if (rp[i] > col) break; 51336bd2089SBarry Smith if (rp[i] == col) { 51436bd2089SBarry Smith bap = ap + bs2 * i; 51536bd2089SBarry Smith if (roworiented) { 51636bd2089SBarry Smith if (is == ADD_VALUES) { 51736bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 518ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 51936bd2089SBarry Smith } 52036bd2089SBarry Smith } else { 52136bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 522ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 52336bd2089SBarry Smith } 52436bd2089SBarry Smith } 52536bd2089SBarry Smith } else { 52636bd2089SBarry Smith if (is == ADD_VALUES) { 52736bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 528ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 52936bd2089SBarry Smith bap += bs; 53036bd2089SBarry Smith } 53136bd2089SBarry Smith } else { 53236bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 533ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 53436bd2089SBarry Smith bap += bs; 53536bd2089SBarry Smith } 53636bd2089SBarry Smith } 53736bd2089SBarry Smith } 53836bd2089SBarry Smith goto noinsert2; 53936bd2089SBarry Smith } 54036bd2089SBarry Smith } 54136bd2089SBarry Smith if (nonew == 1) goto noinsert2; 5425f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 54336bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 5449371c9d4SSatish Balay N = nrow++ - 1; 5459371c9d4SSatish Balay high++; 54636bd2089SBarry Smith /* shift up all the later entries in this row */ 5479566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 5489566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 54936bd2089SBarry Smith rp[i] = col; 55036bd2089SBarry Smith bap = ap + bs2 * i; 55136bd2089SBarry Smith if (roworiented) { 55236bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 553ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 55436bd2089SBarry Smith } 55536bd2089SBarry Smith } else { 55636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 557ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 55836bd2089SBarry Smith } 55936bd2089SBarry Smith } 56036bd2089SBarry Smith noinsert2:; 56136bd2089SBarry Smith ailen[row] = nrow; 5623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56336bd2089SBarry Smith } 56436bd2089SBarry Smith 56536bd2089SBarry Smith /* 56636bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 56736bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 56836bd2089SBarry Smith */ 569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 570d71ae5a4SJacob Faibussowitsch { 5710880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 572f15d580aSBarry Smith const MatScalar *value; 573f15d580aSBarry Smith MatScalar *barray = baij->barray; 574ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 575899cda47SBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 576476417e5SBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 577476417e5SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 5780880e062SHong Zhang 579a30f8f8cSSatish Balay PetscFunctionBegin; 5800880e062SHong Zhang if (!barray) { 5819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 5820880e062SHong Zhang baij->barray = barray; 5830880e062SHong Zhang } 5840880e062SHong Zhang 5850880e062SHong Zhang if (roworiented) { 5860880e062SHong Zhang stepval = (n - 1) * bs; 5870880e062SHong Zhang } else { 5880880e062SHong Zhang stepval = (m - 1) * bs; 5890880e062SHong Zhang } 5900880e062SHong Zhang for (i = 0; i < m; i++) { 5910880e062SHong Zhang if (im[i] < 0) continue; 5926bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 5930880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5940880e062SHong Zhang row = im[i] - rstart; 5950880e062SHong Zhang for (j = 0; j < n; j++) { 596f3f98c53SJed Brown if (im[i] > in[j]) { 597f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 598e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 599f3f98c53SJed Brown } 6000880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 6010880e062SHong Zhang if ((roworiented) && (n == 1)) { 602f15d580aSBarry Smith barray = (MatScalar *)v + i * bs2; 6030880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 604f15d580aSBarry Smith barray = (MatScalar *)v + j * bs2; 6050880e062SHong Zhang } else { /* Here a copy is required */ 6060880e062SHong Zhang if (roworiented) { 6070880e062SHong Zhang value = v + i * (stepval + bs) * bs + j * bs; 6080880e062SHong Zhang } else { 6090880e062SHong Zhang value = v + j * (stepval + bs) * bs + i * bs; 6100880e062SHong Zhang } 6110880e062SHong Zhang for (ii = 0; ii < bs; ii++, value += stepval) { 612ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *barray++ = *value++; 6130880e062SHong Zhang } 6140880e062SHong Zhang barray -= bs2; 6150880e062SHong Zhang } 6160880e062SHong Zhang 6170880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 6180880e062SHong Zhang col = in[j] - cstart; 6199566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 620f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 621f7d195e4SLawrence Mitchell continue; 622f7d195e4SLawrence Mitchell } else { 623f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 6240880e062SHong Zhang if (mat->was_assembled) { 62548a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 6260880e062SHong Zhang 6272515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 6280880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 6299371c9d4SSatish Balay { 6309371c9d4SSatish Balay PetscInt data; 631eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 63208401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 6330880e062SHong Zhang } 6340880e062SHong Zhang #else 63508401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 6360880e062SHong Zhang #endif 6370880e062SHong Zhang #endif 6380880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 639eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 6400880e062SHong Zhang col = (col - 1) / bs; 6410880e062SHong Zhang #else 6420880e062SHong Zhang col = (baij->colmap[in[j]] - 1) / bs; 6430880e062SHong Zhang #endif 6440880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 6459566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 6460880e062SHong Zhang col = in[j]; 6470880e062SHong Zhang } 64826fbe8dcSKarl Rupp } else col = in[j]; 6499566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 6500880e062SHong Zhang } 6510880e062SHong Zhang } 6520880e062SHong Zhang } else { 6535f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 6540880e062SHong Zhang if (!baij->donotstash) { 6550880e062SHong Zhang if (roworiented) { 6569566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 6570880e062SHong Zhang } else { 6589566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 6590880e062SHong Zhang } 6600880e062SHong Zhang } 6610880e062SHong Zhang } 6620880e062SHong Zhang } 6633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 664a30f8f8cSSatish Balay } 665a30f8f8cSSatish Balay 666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 667d71ae5a4SJacob Faibussowitsch { 668f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 669d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 670d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 671a30f8f8cSSatish Balay 672a30f8f8cSSatish Balay PetscFunctionBegin; 673a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 67454c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 67554c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 676a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 677a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 678a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 67954c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 68054c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 681a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 682a30f8f8cSSatish Balay col = idxn[j] - bscstart; 6839566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 684a30f8f8cSSatish Balay } else { 68548a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 686a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 687eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 688a30f8f8cSSatish Balay data--; 689a30f8f8cSSatish Balay #else 690a30f8f8cSSatish Balay data = baij->colmap[idxn[j] / bs] - 1; 691a30f8f8cSSatish Balay #endif 692a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 693a30f8f8cSSatish Balay else { 694a30f8f8cSSatish Balay col = data + idxn[j] % bs; 6959566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 696a30f8f8cSSatish Balay } 697a30f8f8cSSatish Balay } 698a30f8f8cSSatish Balay } 699f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 700a30f8f8cSSatish Balay } 7013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 702a30f8f8cSSatish Balay } 703a30f8f8cSSatish Balay 704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 705d71ae5a4SJacob Faibussowitsch { 706a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 707a30f8f8cSSatish Balay PetscReal sum[2], *lnorm2; 708a30f8f8cSSatish Balay 709a30f8f8cSSatish Balay PetscFunctionBegin; 710a30f8f8cSSatish Balay if (baij->size == 1) { 7119566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, norm)); 712a30f8f8cSSatish Balay } else { 713a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 7149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2, &lnorm2)); 7159566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, lnorm2)); 7169371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 7179371c9d4SSatish Balay lnorm2++; /* squar power of norm(A) */ 7189566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->B, type, lnorm2)); 7199371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 7209371c9d4SSatish Balay lnorm2--; /* squar power of norm(B) */ 7211c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7228f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 7239566063dSJacob Faibussowitsch PetscCall(PetscFree(lnorm2)); 7240b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 7250b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 7260b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 7270b8dc8d2SHong Zhang PetscReal *rsum, *rsum2, vabs; 728899cda47SBarry Smith PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 729d0f46423SBarry Smith PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 7300b8dc8d2SHong Zhang MatScalar *v; 7310b8dc8d2SHong Zhang 7329566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 7339566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 7340b8dc8d2SHong Zhang /* Amat */ 7359371c9d4SSatish Balay v = amat->a; 7369371c9d4SSatish Balay jj = amat->j; 7370b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 7380b8dc8d2SHong Zhang grow = bs * (rstart + brow); 7390b8dc8d2SHong Zhang nz = amat->i[brow + 1] - amat->i[brow]; 7400b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 7419371c9d4SSatish Balay gcol = bs * (rstart + *jj); 7429371c9d4SSatish Balay jj++; 7430b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7440b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7459371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7469371c9d4SSatish Balay v++; 7470b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7480b8dc8d2SHong Zhang /* non-diagonal block */ 7490b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 7500b8dc8d2SHong Zhang } 7510b8dc8d2SHong Zhang } 7520b8dc8d2SHong Zhang } 7539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7540b8dc8d2SHong Zhang } 7550b8dc8d2SHong Zhang /* Bmat */ 7569371c9d4SSatish Balay v = bmat->a; 7579371c9d4SSatish Balay jj = bmat->j; 7580b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 7590b8dc8d2SHong Zhang grow = bs * (rstart + brow); 7600b8dc8d2SHong Zhang nz = bmat->i[brow + 1] - bmat->i[brow]; 7610b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 7629371c9d4SSatish Balay gcol = bs * garray[*jj]; 7639371c9d4SSatish Balay jj++; 7640b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7650b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7669371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7679371c9d4SSatish Balay v++; 7680b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7690b8dc8d2SHong Zhang rsum[grow + row] += vabs; 7700b8dc8d2SHong Zhang } 7710b8dc8d2SHong Zhang } 7720b8dc8d2SHong Zhang } 7739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7740b8dc8d2SHong Zhang } 7751c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7760b8dc8d2SHong Zhang *norm = 0.0; 777d0f46423SBarry Smith for (col = 0; col < mat->cmap->N; col++) { 7780b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7790b8dc8d2SHong Zhang } 7809566063dSJacob Faibussowitsch PetscCall(PetscFree2(rsum, rsum2)); 781f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 782a30f8f8cSSatish Balay } 7833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 784a30f8f8cSSatish Balay } 785a30f8f8cSSatish Balay 786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 787d71ae5a4SJacob Faibussowitsch { 788a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 7891302d50aSBarry Smith PetscInt nstash, reallocs; 790a30f8f8cSSatish Balay 791a30f8f8cSSatish Balay PetscFunctionBegin; 7923ba16761SJacob Faibussowitsch if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 793a30f8f8cSSatish Balay 7949566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 7959566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 7969566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7979566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7989566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7999566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 8003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 801a30f8f8cSSatish Balay } 802a30f8f8cSSatish Balay 803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 804d71ae5a4SJacob Faibussowitsch { 805a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 806a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 80713f74950SBarry Smith PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 808e44c0bd4SBarry Smith PetscInt *row, *col; 809ace3abfcSBarry Smith PetscBool other_disassembled; 81013f74950SBarry Smith PetscMPIInt n; 811ace3abfcSBarry Smith PetscBool r1, r2, r3; 812a30f8f8cSSatish Balay MatScalar *val; 813a30f8f8cSSatish Balay 81491c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 815a30f8f8cSSatish Balay PetscFunctionBegin; 8164cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 817a30f8f8cSSatish Balay while (1) { 8189566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 819a30f8f8cSSatish Balay if (!flg) break; 820a30f8f8cSSatish Balay 821a30f8f8cSSatish Balay for (i = 0; i < n;) { 822a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 82326fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 82426fbe8dcSKarl Rupp if (row[j] != rstart) break; 82526fbe8dcSKarl Rupp } 826a30f8f8cSSatish Balay if (j < n) ncols = j - i; 827a30f8f8cSSatish Balay else ncols = n - i; 828a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 8299566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 830a30f8f8cSSatish Balay i = j; 831a30f8f8cSSatish Balay } 832a30f8f8cSSatish Balay } 8339566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 834a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 835a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 836a30f8f8cSSatish Balay restore the original flags */ 837a30f8f8cSSatish Balay r1 = baij->roworiented; 838a30f8f8cSSatish Balay r2 = a->roworiented; 83991c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 84026fbe8dcSKarl Rupp 841a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 842a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 84326fbe8dcSKarl Rupp 84491c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 845a30f8f8cSSatish Balay while (1) { 8469566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 847a30f8f8cSSatish Balay if (!flg) break; 848a30f8f8cSSatish Balay 849a30f8f8cSSatish Balay for (i = 0; i < n;) { 850a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 85126fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 85226fbe8dcSKarl Rupp if (row[j] != rstart) break; 85326fbe8dcSKarl Rupp } 854a30f8f8cSSatish Balay if (j < n) ncols = j - i; 855a30f8f8cSSatish Balay else ncols = n - i; 8569566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 857a30f8f8cSSatish Balay i = j; 858a30f8f8cSSatish Balay } 859a30f8f8cSSatish Balay } 8609566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 86126fbe8dcSKarl Rupp 862a30f8f8cSSatish Balay baij->roworiented = r1; 863a30f8f8cSSatish Balay a->roworiented = r2; 86426fbe8dcSKarl Rupp 86591c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 866a30f8f8cSSatish Balay } 867a30f8f8cSSatish Balay 8689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->A, mode)); 8699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->A, mode)); 870a30f8f8cSSatish Balay 871a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 8726aad120cSJose E. Roman also disassemble ourselves, in order that we may reassemble. */ 873a30f8f8cSSatish Balay /* 874a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 875a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 876a30f8f8cSSatish Balay */ 877a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 8785f9db2b2SJunchao Zhang PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 87948a46eb9SPierre Jolivet if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 880a30f8f8cSSatish Balay } 881a30f8f8cSSatish Balay 8829371c9d4SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 8839566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->B, mode)); 8849566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->B, mode)); 885a30f8f8cSSatish Balay 8869566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 88726fbe8dcSKarl Rupp 888f4259b30SLisandro Dalcin baij->rowvalues = NULL; 8894f9cfa9eSBarry Smith 8904f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8914f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 892e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 8931c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 894e56f5c9eSBarry Smith } 8953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 896a30f8f8cSSatish Balay } 897a30f8f8cSSatish Balay 898dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 8999804daf3SBarry Smith #include <petscdraw.h> 900d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 901d71ae5a4SJacob Faibussowitsch { 902a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 903d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 9047da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 905ace3abfcSBarry Smith PetscBool iascii, isdraw; 906b0a32e0cSBarry Smith PetscViewer sviewer; 907f3ef73ceSBarry Smith PetscViewerFormat format; 908a30f8f8cSSatish Balay 909a30f8f8cSSatish Balay PetscFunctionBegin; 9109566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 9119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 91232077d6dSBarry Smith if (iascii) { 9139566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 914456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915a30f8f8cSSatish Balay MatInfo info; 9169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 9179566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 9189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 9199371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 9209371c9d4SSatish Balay mat->rmap->bs, (double)info.memory)); 9219566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 9229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9239566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 9249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9259566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 9279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 9289566063dSJacob Faibussowitsch PetscCall(VecScatterView(baij->Mvctx, viewer)); 9293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 930fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 9319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 9323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 933c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 9343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 935a30f8f8cSSatish Balay } 936a30f8f8cSSatish Balay } 937a30f8f8cSSatish Balay 938a30f8f8cSSatish Balay if (isdraw) { 939b0a32e0cSBarry Smith PetscDraw draw; 940ace3abfcSBarry Smith PetscBool isnull; 9419566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 9429566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 9433ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 944a30f8f8cSSatish Balay } 945a30f8f8cSSatish Balay 9467da1fb6eSBarry Smith { 947a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 948a30f8f8cSSatish Balay Mat A; 94965d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 95065d70643SHong Zhang Mat_SeqBAIJ *Bloc; 951d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 952a30f8f8cSSatish Balay MatScalar *a; 9533e219373SBarry Smith const char *matname; 954a30f8f8cSSatish Balay 955f204ca49SKris Buschelman /* Should this be the same type as mat? */ 9569566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 957dd400576SPatrick Sanan if (rank == 0) { 9589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 959a30f8f8cSSatish Balay } else { 9609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 961a30f8f8cSSatish Balay } 9629566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPISBAIJ)); 9639566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 9649566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 965a30f8f8cSSatish Balay 966a30f8f8cSSatish Balay /* copy over the A part */ 96765d70643SHong Zhang Aloc = (Mat_SeqSBAIJ *)baij->A->data; 9689371c9d4SSatish Balay ai = Aloc->i; 9699371c9d4SSatish Balay aj = Aloc->j; 9709371c9d4SSatish Balay a = Aloc->a; 9719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 972a30f8f8cSSatish Balay 973a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 974e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 97526fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 977e9f7bc9eSHong Zhang col = (baij->cstartbs + aj[j]) * bs; 978a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9799566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 98026fbe8dcSKarl Rupp col++; 98126fbe8dcSKarl Rupp a += bs; 982a30f8f8cSSatish Balay } 983a30f8f8cSSatish Balay } 984a30f8f8cSSatish Balay } 985a30f8f8cSSatish Balay /* copy over the B part */ 98665d70643SHong Zhang Bloc = (Mat_SeqBAIJ *)baij->B->data; 9879371c9d4SSatish Balay ai = Bloc->i; 9889371c9d4SSatish Balay aj = Bloc->j; 9899371c9d4SSatish Balay a = Bloc->a; 990a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 991e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 99226fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 993a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 994a30f8f8cSSatish Balay col = baij->garray[aj[j]] * bs; 995a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9969566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 99726fbe8dcSKarl Rupp col++; 99826fbe8dcSKarl Rupp a += bs; 999a30f8f8cSSatish Balay } 1000a30f8f8cSSatish Balay } 1001a30f8f8cSSatish Balay } 10029566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 10039566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 10049566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1005a30f8f8cSSatish Balay /* 1006a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 1007b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 1008a30f8f8cSSatish Balay */ 10099566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 101023a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1011dd400576SPatrick Sanan if (rank == 0) { 101223a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 10139566063dSJacob Faibussowitsch PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1014a30f8f8cSSatish Balay } 10159566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 10169566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 10179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1018a30f8f8cSSatish Balay } 10193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1020a30f8f8cSSatish Balay } 1021a30f8f8cSSatish Balay 1022618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1023618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1024d1654148SHong Zhang 1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1026d71ae5a4SJacob Faibussowitsch { 1027ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 1028a30f8f8cSSatish Balay 1029a30f8f8cSSatish Balay PetscFunctionBegin; 10309566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 10319566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 10329566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 10339566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1034d1654148SHong Zhang if (iascii || isdraw || issocket) { 10359566063dSJacob Faibussowitsch PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 10361baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 10373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1038a30f8f8cSSatish Balay } 1039a30f8f8cSSatish Balay 1040d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1041d71ae5a4SJacob Faibussowitsch { 1042547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1043eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 10446de40e93SBarry Smith PetscScalar *from; 10456de40e93SBarry Smith const PetscScalar *x; 1046547795f9SHong Zhang 1047547795f9SHong Zhang PetscFunctionBegin; 1048547795f9SHong Zhang /* diagonal part */ 10499566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1050629a200eSBarry Smith /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1051629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1052629a200eSBarry Smith PetscCall(VecZeroEntries(a->slvec1b)); 1053547795f9SHong Zhang 1054547795f9SHong Zhang /* subdiagonal part */ 10555f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 10569566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1057547795f9SHong Zhang 1058547795f9SHong Zhang /* copy x into the vec slvec0 */ 10599566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10609566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1061547795f9SHong Zhang 10629566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1065547795f9SHong Zhang 10669566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10679566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1068547795f9SHong Zhang /* supperdiagonal part */ 10699566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 10703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1071547795f9SHong Zhang } 1072547795f9SHong Zhang 1073d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1074d71ae5a4SJacob Faibussowitsch { 1075a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1076eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1077d9ca1df4SBarry Smith PetscScalar *from; 1078d9ca1df4SBarry Smith const PetscScalar *x; 1079a9d4b620SHong Zhang 1080a9d4b620SHong Zhang PetscFunctionBegin; 1081a9d4b620SHong Zhang /* diagonal part */ 10829566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1083629a200eSBarry Smith /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1084629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1085629a200eSBarry Smith PetscCall(VecZeroEntries(a->slvec1b)); 1086a9d4b620SHong Zhang 1087a9d4b620SHong Zhang /* subdiagonal part */ 10889566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1089fc165ae2SBarry Smith 1090a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 10919566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1093a9d4b620SHong Zhang 10949566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10969566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1097fc165ae2SBarry Smith 10989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10999566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1100a9d4b620SHong Zhang /* supperdiagonal part */ 11019566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 11023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1103a9d4b620SHong Zhang } 1104a9d4b620SHong Zhang 1105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1106d71ae5a4SJacob Faibussowitsch { 1107eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1108eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1109629a200eSBarry Smith PetscScalar *from; 1110eb1ec7c1SStefano Zampini const PetscScalar *x; 1111eb1ec7c1SStefano Zampini 1112eb1ec7c1SStefano Zampini PetscFunctionBegin; 1113eb1ec7c1SStefano Zampini /* diagonal part */ 11149566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1115629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1116629a200eSBarry Smith PetscCall(VecZeroEntries(a->slvec1b)); 1117eb1ec7c1SStefano Zampini 1118eb1ec7c1SStefano Zampini /* subdiagonal part */ 11195f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 11209566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1121eb1ec7c1SStefano Zampini 1122eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 11239566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1127eb1ec7c1SStefano Zampini 11289566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11309566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1131eb1ec7c1SStefano Zampini 1132eb1ec7c1SStefano Zampini /* supperdiagonal part */ 11339566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1135eb1ec7c1SStefano Zampini } 1136eb1ec7c1SStefano Zampini 1137d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138d71ae5a4SJacob Faibussowitsch { 1139de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1140d0f46423SBarry Smith PetscInt mbs = a->mbs, bs = A->rmap->bs; 1141629a200eSBarry Smith PetscScalar *from; 1142d9ca1df4SBarry Smith const PetscScalar *x; 1143a9d4b620SHong Zhang 1144a9d4b620SHong Zhang PetscFunctionBegin; 1145a9d4b620SHong Zhang /* diagonal part */ 11469566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1147629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1148629a200eSBarry Smith PetscCall(VecZeroEntries(a->slvec1b)); 1149a9d4b620SHong Zhang 1150a9d4b620SHong Zhang /* subdiagonal part */ 11519566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1152a9d4b620SHong Zhang 1153a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11549566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11559566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11569566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1158a9d4b620SHong Zhang 11599566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1162a9d4b620SHong Zhang 1163a9d4b620SHong Zhang /* supperdiagonal part */ 11649566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1166a9d4b620SHong Zhang } 1167a9d4b620SHong Zhang 1168a30f8f8cSSatish Balay /* 1169a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1170a30f8f8cSSatish Balay diagonal block 1171a30f8f8cSSatish Balay */ 1172d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1173d71ae5a4SJacob Faibussowitsch { 1174a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1175a30f8f8cSSatish Balay 1176a30f8f8cSSatish Balay PetscFunctionBegin; 117708401ef6SPierre Jolivet /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 11789566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A, v)); 11793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1180a30f8f8cSSatish Balay } 1181a30f8f8cSSatish Balay 1182d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1183d71ae5a4SJacob Faibussowitsch { 1184a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1185a30f8f8cSSatish Balay 1186a30f8f8cSSatish Balay PetscFunctionBegin; 11879566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 11889566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 11893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1190a30f8f8cSSatish Balay } 1191a30f8f8cSSatish Balay 1192d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1193d71ae5a4SJacob Faibussowitsch { 1194d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1195d0d4cfc2SHong Zhang PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1196d0f46423SBarry Smith PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1197d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1198899cda47SBarry Smith PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1199d0d4cfc2SHong Zhang 1200a30f8f8cSSatish Balay PetscFunctionBegin; 12015f80ce2aSJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1202d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1203d0d4cfc2SHong Zhang 1204d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1205d0d4cfc2SHong Zhang /* 1206d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1207d0d4cfc2SHong Zhang */ 1208d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1209d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1210d0d4cfc2SHong Zhang PetscInt max = 1, mbs = mat->mbs, tmp; 1211d0d4cfc2SHong Zhang for (i = 0; i < mbs; i++) { 1212d0d4cfc2SHong Zhang tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 121326fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1214d0d4cfc2SHong Zhang } 12159566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1216d0d4cfc2SHong Zhang } 1217d0d4cfc2SHong Zhang 12185f80ce2aSJacob Faibussowitsch PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1219d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1220d0d4cfc2SHong Zhang 12219371c9d4SSatish Balay pvA = &vworkA; 12229371c9d4SSatish Balay pcA = &cworkA; 12239371c9d4SSatish Balay pvB = &vworkB; 12249371c9d4SSatish Balay pcB = &cworkB; 12259371c9d4SSatish Balay if (!v) { 12269371c9d4SSatish Balay pvA = NULL; 12279371c9d4SSatish Balay pvB = NULL; 12289371c9d4SSatish Balay } 12299371c9d4SSatish Balay if (!idx) { 12309371c9d4SSatish Balay pcA = NULL; 12319371c9d4SSatish Balay if (!v) pcB = NULL; 12329371c9d4SSatish Balay } 12339566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 12349566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1235d0d4cfc2SHong Zhang nztot = nzA + nzB; 1236d0d4cfc2SHong Zhang 1237d0d4cfc2SHong Zhang cmap = mat->garray; 1238d0d4cfc2SHong Zhang if (v || idx) { 1239d0d4cfc2SHong Zhang if (nztot) { 1240d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1241d0d4cfc2SHong Zhang PetscInt imark = -1; 1242d0d4cfc2SHong Zhang if (v) { 1243d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1244d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 1245d0d4cfc2SHong Zhang if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1246d0d4cfc2SHong Zhang else break; 1247d0d4cfc2SHong Zhang } 1248d0d4cfc2SHong Zhang imark = i; 1249d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1250d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1251d0d4cfc2SHong Zhang } 1252d0d4cfc2SHong Zhang if (idx) { 1253d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1254d0d4cfc2SHong Zhang if (imark > -1) { 1255ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1256d0d4cfc2SHong Zhang } else { 1257d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 125826fbe8dcSKarl Rupp if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1259d0d4cfc2SHong Zhang else break; 1260d0d4cfc2SHong Zhang } 1261d0d4cfc2SHong Zhang imark = i; 1262d0d4cfc2SHong Zhang } 1263d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1264d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1265d0d4cfc2SHong Zhang } 1266d0d4cfc2SHong Zhang } else { 1267f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1268f4259b30SLisandro Dalcin if (v) *v = NULL; 1269d0d4cfc2SHong Zhang } 1270d0d4cfc2SHong Zhang } 1271d0d4cfc2SHong Zhang *nz = nztot; 12729566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 12739566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 12743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1275a30f8f8cSSatish Balay } 1276a30f8f8cSSatish Balay 1277d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1278d71ae5a4SJacob Faibussowitsch { 1279a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1280a30f8f8cSSatish Balay 1281a30f8f8cSSatish Balay PetscFunctionBegin; 12825f80ce2aSJacob Faibussowitsch PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1283a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 12843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1285a30f8f8cSSatish Balay } 1286a30f8f8cSSatish Balay 1287d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1288d71ae5a4SJacob Faibussowitsch { 1289d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1290d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1291d0d4cfc2SHong Zhang 1292d0d4cfc2SHong Zhang PetscFunctionBegin; 1293d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 12943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1295d0d4cfc2SHong Zhang } 1296d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1297d71ae5a4SJacob Faibussowitsch { 1298d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1299d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1300d0d4cfc2SHong Zhang 1301d0d4cfc2SHong Zhang PetscFunctionBegin; 1302d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 13033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1304d0d4cfc2SHong Zhang } 1305d0d4cfc2SHong Zhang 1306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1307d71ae5a4SJacob Faibussowitsch { 13085f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 13095f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 13102726fb6dSPierre Jolivet Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 13112726fb6dSPierre Jolivet 13129566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 13139566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->B)); 13145f80ce2aSJacob Faibussowitsch } 13153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13162726fb6dSPierre Jolivet } 13172726fb6dSPierre Jolivet 1318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1319d71ae5a4SJacob Faibussowitsch { 132099cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 132199cafbc1SBarry Smith 132299cafbc1SBarry Smith PetscFunctionBegin; 13239566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 13249566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 13253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132699cafbc1SBarry Smith } 132799cafbc1SBarry Smith 1328d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1329d71ae5a4SJacob Faibussowitsch { 133099cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 133199cafbc1SBarry Smith 133299cafbc1SBarry Smith PetscFunctionBegin; 13339566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 13349566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 13353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 133699cafbc1SBarry Smith } 133799cafbc1SBarry Smith 13387dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 133936032a97SHong Zhang Input: isrow - distributed(parallel), 134036032a97SHong Zhang iscol_local - locally owned (seq) 134136032a97SHong Zhang */ 1342d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1343d71ae5a4SJacob Faibussowitsch { 13448f46ffcaSHong Zhang PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 13458f46ffcaSHong Zhang const PetscInt *ptr1, *ptr2; 134636032a97SHong Zhang 134736032a97SHong Zhang PetscFunctionBegin; 13489566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &sz1)); 13499566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local, &sz2)); 13501098a8e8SHong Zhang if (sz1 > sz2) { 13511098a8e8SHong Zhang *flg = PETSC_FALSE; 13523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13531098a8e8SHong Zhang } 13548f46ffcaSHong Zhang 13559566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &ptr1)); 13569566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &ptr2)); 13578f46ffcaSHong Zhang 13589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz1, &a1)); 13599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz2, &a2)); 13609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a1, ptr1, sz1)); 13619566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a2, ptr2, sz2)); 13629566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz1, a1)); 13639566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz2, a2)); 13648f46ffcaSHong Zhang 13658f46ffcaSHong Zhang nmatch = 0; 13668f46ffcaSHong Zhang k = 0; 13678f46ffcaSHong Zhang for (i = 0; i < sz1; i++) { 13688f46ffcaSHong Zhang for (j = k; j < sz2; j++) { 13698f46ffcaSHong Zhang if (a1[i] == a2[j]) { 13709371c9d4SSatish Balay k = j; 13719371c9d4SSatish Balay nmatch++; 13728f46ffcaSHong Zhang break; 13738f46ffcaSHong Zhang } 13748f46ffcaSHong Zhang } 13758f46ffcaSHong Zhang } 13769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &ptr1)); 13779566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 13789566063dSJacob Faibussowitsch PetscCall(PetscFree(a1)); 13799566063dSJacob Faibussowitsch PetscCall(PetscFree(a2)); 13801098a8e8SHong Zhang if (nmatch < sz1) { 13811098a8e8SHong Zhang *flg = PETSC_FALSE; 13821098a8e8SHong Zhang } else { 13831098a8e8SHong Zhang *flg = PETSC_TRUE; 13841098a8e8SHong Zhang } 13853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13868f46ffcaSHong Zhang } 138736032a97SHong Zhang 1388d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1389d71ae5a4SJacob Faibussowitsch { 139036032a97SHong Zhang IS iscol_local; 139136032a97SHong Zhang PetscInt csize; 139236032a97SHong Zhang PetscBool isequal; 139336032a97SHong Zhang 139436032a97SHong Zhang PetscFunctionBegin; 13959566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 139636032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 13979566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 13985f80ce2aSJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 139936032a97SHong Zhang } else { 1400068661f9SToby Isaac PetscBool issorted; 1401068661f9SToby Isaac 14029566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 14039566063dSJacob Faibussowitsch PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 14049566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local, &issorted)); 14055f80ce2aSJacob Faibussowitsch PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 14068f46ffcaSHong Zhang } 14078f46ffcaSHong Zhang 14087dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 14099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 14108f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 14119566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 14129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 14138f46ffcaSHong Zhang } 14143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14158f46ffcaSHong Zhang } 14168f46ffcaSHong Zhang 1417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1418d71ae5a4SJacob Faibussowitsch { 1419a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1420a30f8f8cSSatish Balay 1421a30f8f8cSSatish Balay PetscFunctionBegin; 14229566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 14239566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 14243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1425a30f8f8cSSatish Balay } 1426a30f8f8cSSatish Balay 1427d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1428d71ae5a4SJacob Faibussowitsch { 1429a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1430a30f8f8cSSatish Balay Mat A = a->A, B = a->B; 14313966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 1432a30f8f8cSSatish Balay 1433a30f8f8cSSatish Balay PetscFunctionBegin; 1434d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 143526fbe8dcSKarl Rupp 14369566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 143726fbe8dcSKarl Rupp 14389371c9d4SSatish Balay isend[0] = info->nz_used; 14399371c9d4SSatish Balay isend[1] = info->nz_allocated; 14409371c9d4SSatish Balay isend[2] = info->nz_unneeded; 14419371c9d4SSatish Balay isend[3] = info->memory; 14429371c9d4SSatish Balay isend[4] = info->mallocs; 144326fbe8dcSKarl Rupp 14449566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 144526fbe8dcSKarl Rupp 14469371c9d4SSatish Balay isend[0] += info->nz_used; 14479371c9d4SSatish Balay isend[1] += info->nz_allocated; 14489371c9d4SSatish Balay isend[2] += info->nz_unneeded; 14499371c9d4SSatish Balay isend[3] += info->memory; 14509371c9d4SSatish Balay isend[4] += info->mallocs; 1451a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1452a30f8f8cSSatish Balay info->nz_used = isend[0]; 1453a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1454a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1455a30f8f8cSSatish Balay info->memory = isend[3]; 1456a30f8f8cSSatish Balay info->mallocs = isend[4]; 1457a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14581c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 145926fbe8dcSKarl Rupp 1460a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1461a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1462a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1463a30f8f8cSSatish Balay info->memory = irecv[3]; 1464a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1465a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14661c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 146726fbe8dcSKarl Rupp 1468a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1469a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1470a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1471a30f8f8cSSatish Balay info->memory = irecv[3]; 1472a30f8f8cSSatish Balay info->mallocs = irecv[4]; 147398921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1474a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1475a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1476a30f8f8cSSatish Balay info->factor_mallocs = 0; 14773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1478a30f8f8cSSatish Balay } 1479a30f8f8cSSatish Balay 1480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1481d71ae5a4SJacob Faibussowitsch { 1482a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1483d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1484a30f8f8cSSatish Balay 1485a30f8f8cSSatish Balay PetscFunctionBegin; 1486e98b92d7SKris Buschelman switch (op) { 1487512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1488e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 148928b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1490a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1491c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1492e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 149343674050SBarry Smith MatCheckPreallocated(A, 1); 14949566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14959566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1496e98b92d7SKris Buschelman break; 1497e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 149843674050SBarry Smith MatCheckPreallocated(A, 1); 14994e0d8c25SBarry Smith a->roworiented = flg; 150026fbe8dcSKarl Rupp 15019566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 15029566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1503e98b92d7SKris Buschelman break; 15048c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1505d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1506d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1507d71ae5a4SJacob Faibussowitsch break; 1508d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 1509d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 1510d71ae5a4SJacob Faibussowitsch break; 1511d71ae5a4SJacob Faibussowitsch case MAT_USE_HASH_TABLE: 1512d71ae5a4SJacob Faibussowitsch a->ht_flag = flg; 1513d71ae5a4SJacob Faibussowitsch break; 1514d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 1515d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1516d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 15170f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1518eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1519547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1520eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1521eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1522eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1523b94d7dedSBarry Smith A->symmetric = PETSC_BOOL3_FALSE; 1524eb1ec7c1SStefano Zampini } 15250f2140c7SStefano Zampini #endif 1526eeffb40dSHong Zhang break; 1527ffa07934SHong Zhang case MAT_SPD: 1528d71ae5a4SJacob Faibussowitsch case MAT_SYMMETRIC: 1529d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1530d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1531eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1532eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1533eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1534eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1535eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1536eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1537eb1ec7c1SStefano Zampini } 1538eb1ec7c1SStefano Zampini #endif 1539eeffb40dSHong Zhang break; 154077e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 154143674050SBarry Smith MatCheckPreallocated(A, 1); 15429566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1543eeffb40dSHong Zhang break; 15449a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1545b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 15465f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 15479566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 154877e54ba9SKris Buschelman break; 1549d71ae5a4SJacob Faibussowitsch case MAT_SPD_ETERNAL: 1550d71ae5a4SJacob Faibussowitsch break; 1551d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_LOWER_TRIANGULAR: 1552d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1553d71ae5a4SJacob Faibussowitsch break; 1554d71ae5a4SJacob Faibussowitsch case MAT_ERROR_LOWER_TRIANGULAR: 1555d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1556d71ae5a4SJacob Faibussowitsch break; 1557d71ae5a4SJacob Faibussowitsch case MAT_GETROW_UPPERTRIANGULAR: 1558d71ae5a4SJacob Faibussowitsch aA->getrow_utriangular = flg; 1559d71ae5a4SJacob Faibussowitsch break; 1560d71ae5a4SJacob Faibussowitsch default: 1561d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1562a30f8f8cSSatish Balay } 15633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1564a30f8f8cSSatish Balay } 1565a30f8f8cSSatish Balay 1566d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1567d71ae5a4SJacob Faibussowitsch { 1568a30f8f8cSSatish Balay PetscFunctionBegin; 15697fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1570cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 15719566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1572cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 15739566063dSJacob Faibussowitsch PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1574fc4dec0aSBarry Smith } 15753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1576a30f8f8cSSatish Balay } 1577a30f8f8cSSatish Balay 1578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1579d71ae5a4SJacob Faibussowitsch { 1580a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1581a30f8f8cSSatish Balay Mat a = baij->A, b = baij->B; 15825e90f9d9SHong Zhang PetscInt nv, m, n; 1583ace3abfcSBarry Smith PetscBool flg; 1584a30f8f8cSSatish Balay 1585a30f8f8cSSatish Balay PetscFunctionBegin; 1586a30f8f8cSSatish Balay if (ll != rr) { 15879566063dSJacob Faibussowitsch PetscCall(VecEqual(ll, rr, &flg)); 15885f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1589a30f8f8cSSatish Balay } 15903ba16761SJacob Faibussowitsch if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1591b3bf805bSHong Zhang 15929566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 15935f80ce2aSJacob Faibussowitsch PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1594b3bf805bSHong Zhang 15959566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &nv)); 15965f80ce2aSJacob Faibussowitsch PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 15975e90f9d9SHong Zhang 15989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 15995e90f9d9SHong Zhang 16005e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 1601dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 16025e90f9d9SHong Zhang 16035e90f9d9SHong Zhang /* scale the diagonal part */ 1604dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 1605a30f8f8cSSatish Balay 16065e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 16079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1608dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 16093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1610a30f8f8cSSatish Balay } 1611a30f8f8cSSatish Balay 1612d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1613d71ae5a4SJacob Faibussowitsch { 1614f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1615a30f8f8cSSatish Balay 1616a30f8f8cSSatish Balay PetscFunctionBegin; 16179566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 16183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1619a30f8f8cSSatish Balay } 1620a30f8f8cSSatish Balay 16216849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1622a30f8f8cSSatish Balay 1623d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1624d71ae5a4SJacob Faibussowitsch { 1625a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1626a30f8f8cSSatish Balay Mat a, b, c, d; 1627ace3abfcSBarry Smith PetscBool flg; 1628a30f8f8cSSatish Balay 1629a30f8f8cSSatish Balay PetscFunctionBegin; 16309371c9d4SSatish Balay a = matA->A; 16319371c9d4SSatish Balay b = matA->B; 16329371c9d4SSatish Balay c = matB->A; 16339371c9d4SSatish Balay d = matB->B; 1634a30f8f8cSSatish Balay 16359566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 163648a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 16371c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 16383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1639a30f8f8cSSatish Balay } 1640a30f8f8cSSatish Balay 1641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1642d71ae5a4SJacob Faibussowitsch { 16434c7a3774SStefano Zampini PetscBool isbaij; 16443c896bc6SHong Zhang 16453c896bc6SHong Zhang PetscFunctionBegin; 16469566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 16475f80ce2aSJacob Faibussowitsch PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 16483c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16493c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 16509566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 16519566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 16529566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 16533c896bc6SHong Zhang } else { 16544c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 16554c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 16564c7a3774SStefano Zampini 16579566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 16589566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 16593c896bc6SHong Zhang } 16609566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 16613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16623c896bc6SHong Zhang } 16633c896bc6SHong Zhang 1664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1665d71ae5a4SJacob Faibussowitsch { 16664fe895cdSHong Zhang Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 16674fe895cdSHong Zhang PetscBLASInt bnz, one = 1; 16684fe895cdSHong Zhang Mat_SeqSBAIJ *xa, *ya; 16694fe895cdSHong Zhang Mat_SeqBAIJ *xb, *yb; 16704fe895cdSHong Zhang 16714fe895cdSHong Zhang PetscFunctionBegin; 16724fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 16734fe895cdSHong Zhang PetscScalar alpha = a; 16744fe895cdSHong Zhang xa = (Mat_SeqSBAIJ *)xx->A->data; 16754fe895cdSHong Zhang ya = (Mat_SeqSBAIJ *)yy->A->data; 16769566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1677792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 16784fe895cdSHong Zhang xb = (Mat_SeqBAIJ *)xx->B->data; 16794fe895cdSHong Zhang yb = (Mat_SeqBAIJ *)yy->B->data; 16809566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1681792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 16829566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1683ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 16849566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 16859566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 16869566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 16874fe895cdSHong Zhang } else { 16884de5dceeSHong Zhang Mat B; 16894de5dceeSHong Zhang PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 16905f80ce2aSJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 16919566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(X)); 16929566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(Y)); 16939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 16949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 16959566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 16969566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 16979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 16989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 16999566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPISBAIJ)); 17009566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 17019566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 17029566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 17039566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 17049566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 17059566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 17069566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 17079566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(X)); 17089566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(Y)); 17094fe895cdSHong Zhang } 17103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17114fe895cdSHong Zhang } 17124fe895cdSHong Zhang 1713d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1714d71ae5a4SJacob Faibussowitsch { 17151302d50aSBarry Smith PetscInt i; 1716afebec48SHong Zhang PetscBool flg; 1717a5e6ed63SBarry Smith 17186849ba73SBarry Smith PetscFunctionBegin; 17199566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1720a5e6ed63SBarry Smith for (i = 0; i < n; i++) { 17219566063dSJacob Faibussowitsch PetscCall(ISEqual(irow[i], icol[i], &flg)); 172248a46eb9SPierre Jolivet if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 17234dcd73b1SHong Zhang } 17243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1725a5e6ed63SBarry Smith } 1726a5e6ed63SBarry Smith 1727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1728d71ae5a4SJacob Faibussowitsch { 17297d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 17306f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 17317d68702bSBarry Smith 17327d68702bSBarry Smith PetscFunctionBegin; 17336f33a894SBarry Smith if (!Y->preallocated) { 17349566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 17356f33a894SBarry Smith } else if (!aij->nz) { 1736b83222d8SBarry Smith PetscInt nonew = aij->nonew; 17379566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1738b83222d8SBarry Smith aij->nonew = nonew; 17397d68702bSBarry Smith } 17409566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 17413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17427d68702bSBarry Smith } 17437d68702bSBarry Smith 1744d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1745d71ae5a4SJacob Faibussowitsch { 17463b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 17473b49f96aSBarry Smith 17483b49f96aSBarry Smith PetscFunctionBegin; 17495f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 17509566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 17513b49f96aSBarry Smith if (d) { 17523b49f96aSBarry Smith PetscInt rstart; 17539566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 17543b49f96aSBarry Smith *d += rstart / A->rmap->bs; 17553b49f96aSBarry Smith } 17563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17573b49f96aSBarry Smith } 17583b49f96aSBarry Smith 1759d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1760d71ae5a4SJacob Faibussowitsch { 1761a5b7ff6bSBarry Smith PetscFunctionBegin; 1762a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ *)A->data)->A; 17633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1764a5b7ff6bSBarry Smith } 17653b49f96aSBarry Smith 17663964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1767a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1768a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1769a9d4b620SHong Zhang MatMult_MPISBAIJ, 177097304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1771431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1772431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1773f4259b30SLisandro Dalcin NULL, 1774f4259b30SLisandro Dalcin NULL, 1775f4259b30SLisandro Dalcin NULL, 1776f4259b30SLisandro Dalcin /* 10*/ NULL, 1777f4259b30SLisandro Dalcin NULL, 1778f4259b30SLisandro Dalcin NULL, 177941f059aeSBarry Smith MatSOR_MPISBAIJ, 1780a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 178197304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1782a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1783a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1784a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1785a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 178697304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1787a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1788a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1789a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1790f4259b30SLisandro Dalcin /* 24*/ NULL, 1791f4259b30SLisandro Dalcin NULL, 1792f4259b30SLisandro Dalcin NULL, 1793f4259b30SLisandro Dalcin NULL, 1794f4259b30SLisandro Dalcin NULL, 179526cec326SBarry Smith /* 29*/ MatSetUp_MPI_Hash, 1796f4259b30SLisandro Dalcin NULL, 1797f4259b30SLisandro Dalcin NULL, 1798a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1799f4259b30SLisandro Dalcin NULL, 1800d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1801f4259b30SLisandro Dalcin NULL, 1802f4259b30SLisandro Dalcin NULL, 1803f4259b30SLisandro Dalcin NULL, 1804f4259b30SLisandro Dalcin NULL, 1805d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18067dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1807d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1808a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18093c896bc6SHong Zhang MatCopy_MPISBAIJ, 1810f4259b30SLisandro Dalcin /* 44*/ NULL, 1811a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18127d68702bSBarry Smith MatShift_MPISBAIJ, 1813f4259b30SLisandro Dalcin NULL, 1814f4259b30SLisandro Dalcin NULL, 1815f4259b30SLisandro Dalcin /* 49*/ NULL, 1816f4259b30SLisandro Dalcin NULL, 1817f4259b30SLisandro Dalcin NULL, 1818f4259b30SLisandro Dalcin NULL, 1819f4259b30SLisandro Dalcin NULL, 1820f4259b30SLisandro Dalcin /* 54*/ NULL, 1821f4259b30SLisandro Dalcin NULL, 1822a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1823f4259b30SLisandro Dalcin NULL, 1824a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18257dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1826f4259b30SLisandro Dalcin NULL, 1827f4259b30SLisandro Dalcin NULL, 1828f4259b30SLisandro Dalcin NULL, 1829f4259b30SLisandro Dalcin NULL, 1830f4259b30SLisandro Dalcin /* 64*/ NULL, 1831f4259b30SLisandro Dalcin NULL, 1832f4259b30SLisandro Dalcin NULL, 1833f4259b30SLisandro Dalcin NULL, 1834f4259b30SLisandro Dalcin NULL, 1835d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1836f4259b30SLisandro Dalcin NULL, 183728d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 1838f4259b30SLisandro Dalcin NULL, 1839f4259b30SLisandro Dalcin NULL, 1840f4259b30SLisandro Dalcin /* 74*/ NULL, 1841f4259b30SLisandro Dalcin NULL, 1842f4259b30SLisandro Dalcin NULL, 1843f4259b30SLisandro Dalcin NULL, 1844f4259b30SLisandro Dalcin NULL, 1845f4259b30SLisandro Dalcin /* 79*/ NULL, 1846f4259b30SLisandro Dalcin NULL, 1847f4259b30SLisandro Dalcin NULL, 1848f4259b30SLisandro Dalcin NULL, 18495bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1850f4259b30SLisandro Dalcin /* 84*/ NULL, 1851f4259b30SLisandro Dalcin NULL, 1852f4259b30SLisandro Dalcin NULL, 1853f4259b30SLisandro Dalcin NULL, 1854f4259b30SLisandro Dalcin NULL, 1855f4259b30SLisandro Dalcin /* 89*/ NULL, 1856f4259b30SLisandro Dalcin NULL, 1857f4259b30SLisandro Dalcin NULL, 1858f4259b30SLisandro Dalcin NULL, 1859f4259b30SLisandro Dalcin NULL, 1860f4259b30SLisandro Dalcin /* 94*/ NULL, 1861f4259b30SLisandro Dalcin NULL, 1862f4259b30SLisandro Dalcin NULL, 1863f4259b30SLisandro Dalcin NULL, 1864f4259b30SLisandro Dalcin NULL, 1865f4259b30SLisandro Dalcin /* 99*/ NULL, 1866f4259b30SLisandro Dalcin NULL, 1867f4259b30SLisandro Dalcin NULL, 18682726fb6dSPierre Jolivet MatConjugate_MPISBAIJ, 1869f4259b30SLisandro Dalcin NULL, 1870f4259b30SLisandro Dalcin /*104*/ NULL, 187199cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1872d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1873d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 187495936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 1875f4259b30SLisandro Dalcin /*109*/ NULL, 1876f4259b30SLisandro Dalcin NULL, 1877f4259b30SLisandro Dalcin NULL, 1878f4259b30SLisandro Dalcin NULL, 18793b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 1880f4259b30SLisandro Dalcin /*114*/ NULL, 1881f4259b30SLisandro Dalcin NULL, 1882f4259b30SLisandro Dalcin NULL, 1883f4259b30SLisandro Dalcin NULL, 1884f4259b30SLisandro Dalcin NULL, 1885f4259b30SLisandro Dalcin /*119*/ NULL, 1886f4259b30SLisandro Dalcin NULL, 1887f4259b30SLisandro Dalcin NULL, 1888f4259b30SLisandro Dalcin NULL, 1889f4259b30SLisandro Dalcin NULL, 1890f4259b30SLisandro Dalcin /*124*/ NULL, 1891f4259b30SLisandro Dalcin NULL, 1892f4259b30SLisandro Dalcin NULL, 1893f4259b30SLisandro Dalcin NULL, 1894f4259b30SLisandro Dalcin NULL, 1895f4259b30SLisandro Dalcin /*129*/ NULL, 1896f4259b30SLisandro Dalcin NULL, 1897f4259b30SLisandro Dalcin NULL, 1898f4259b30SLisandro Dalcin NULL, 1899f4259b30SLisandro Dalcin NULL, 1900f4259b30SLisandro Dalcin /*134*/ NULL, 1901f4259b30SLisandro Dalcin NULL, 1902f4259b30SLisandro Dalcin NULL, 1903f4259b30SLisandro Dalcin NULL, 1904f4259b30SLisandro Dalcin NULL, 190546533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1906f4259b30SLisandro Dalcin NULL, 1907f4259b30SLisandro Dalcin NULL, 1908f4259b30SLisandro Dalcin NULL, 1909f4259b30SLisandro Dalcin NULL, 1910d70f29a3SPierre Jolivet /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1911d70f29a3SPierre Jolivet NULL, 1912d70f29a3SPierre Jolivet NULL, 191399a7f59eSMark Adams NULL, 191499a7f59eSMark Adams NULL, 19157fb60732SBarry Smith NULL, 1916dec0b466SHong Zhang /*150*/ NULL, 1917dec0b466SHong Zhang NULL}; 1918a30f8f8cSSatish Balay 1919d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1920d71ae5a4SJacob Faibussowitsch { 1921476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1922535b19f3SBarry Smith PetscInt i, mbs, Mbs; 19235d2a9ed1SStefano Zampini PetscMPIInt size; 1924a23d5eceSKris Buschelman 1925a23d5eceSKris Buschelman PetscFunctionBegin; 1926ad79cf63SBarry Smith if (B->hash_active) { 1927aea10558SJacob Faibussowitsch B->ops[0] = b->cops; 1928ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 1929ad79cf63SBarry Smith } 1930ad79cf63SBarry Smith if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 19319566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 19329566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19339566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 19349566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 19355f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 19365f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1937899cda47SBarry Smith 1938d0f46423SBarry Smith mbs = B->rmap->n / bs; 1939d0f46423SBarry Smith Mbs = B->rmap->N / bs; 19405f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1941a23d5eceSKris Buschelman 1942d0f46423SBarry Smith B->rmap->bs = bs; 1943a23d5eceSKris Buschelman b->bs2 = bs * bs; 1944a23d5eceSKris Buschelman b->mbs = mbs; 1945a23d5eceSKris Buschelman b->Mbs = Mbs; 1946de64b629SHong Zhang b->nbs = B->cmap->n / bs; 1947de64b629SHong Zhang b->Nbs = B->cmap->N / bs; 1948a23d5eceSKris Buschelman 1949ad540459SPierre Jolivet for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1950d0f46423SBarry Smith b->rstartbs = B->rmap->rstart / bs; 1951d0f46423SBarry Smith b->rendbs = B->rmap->rend / bs; 1952a23d5eceSKris Buschelman 1953d0f46423SBarry Smith b->cstartbs = B->cmap->rstart / bs; 1954d0f46423SBarry Smith b->cendbs = B->cmap->rend / bs; 1955a23d5eceSKris Buschelman 1956cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 1957eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 1958cb7b82ddSBarry Smith #else 19599566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 1960cb7b82ddSBarry Smith #endif 19619566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 19629566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 19639566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 19649566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0)); 19659566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0b)); 19669566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1)); 19679566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1a)); 19689566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1b)); 19699566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->sMvctx)); 1970cb7b82ddSBarry Smith 19719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 19729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 19739566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 19749566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 19759566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1976cb7b82ddSBarry Smith 1977ad79cf63SBarry Smith PetscCall(MatDestroy(&b->A)); 19789566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 19799566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 19809566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1981a23d5eceSKris Buschelman 19829566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 19839566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 198426fbe8dcSKarl Rupp 1985526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 1986cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 1987cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 19883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1989a23d5eceSKris Buschelman } 1990a23d5eceSKris Buschelman 1991d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1992d71ae5a4SJacob Faibussowitsch { 199302106b30SBarry Smith PetscInt m, rstart, cend; 1994f4259b30SLisandro Dalcin PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1995f4259b30SLisandro Dalcin const PetscInt *JJ = NULL; 1996f4259b30SLisandro Dalcin PetscScalar *values = NULL; 1997bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 19983bd0feecSPierre Jolivet PetscBool nooffprocentries; 1999dfb205c3SBarry Smith 2000dfb205c3SBarry Smith PetscFunctionBegin; 20015f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 20029566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 20039566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 20049566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 20059566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 20069566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2007dfb205c3SBarry Smith m = B->rmap->n / bs; 2008dfb205c3SBarry Smith rstart = B->rmap->rstart / bs; 2009dfb205c3SBarry Smith cend = B->cmap->rend / bs; 2010dfb205c3SBarry Smith 20115f80ce2aSJacob Faibussowitsch PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 20129566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2013dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2014dfb205c3SBarry Smith nz = ii[i + 1] - ii[i]; 20155f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 20160cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2017dfb205c3SBarry Smith JJ = jj + ii[i]; 20180cd7f59aSBarry Smith bd = 0; 2019dfb205c3SBarry Smith for (j = 0; j < nz; j++) { 20200cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2021dfb205c3SBarry Smith JJ++; 20220cd7f59aSBarry Smith bd++; 2023dfb205c3SBarry Smith } 2024dfb205c3SBarry Smith d = 0; 2025dfb205c3SBarry Smith for (; j < nz; j++) { 2026dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2027dfb205c3SBarry Smith d++; 2028dfb205c3SBarry Smith } 2029dfb205c3SBarry Smith d_nnz[i] = d; 20300cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20310cd7f59aSBarry Smith nz = nz - bd; 20320cd7f59aSBarry Smith nz_max = PetscMax(nz_max, nz); 2033dfb205c3SBarry Smith } 20349566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 20359566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 20369566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 2037dfb205c3SBarry Smith 2038dfb205c3SBarry Smith values = (PetscScalar *)V; 203948a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2040dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2041dfb205c3SBarry Smith PetscInt row = i + rstart; 2042dfb205c3SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 2043dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2044bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2045dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 20469566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2047bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2048bb80cfbbSStefano Zampini PetscInt j; 20490cd7f59aSBarry Smith for (j = 0; j < ncols; j++) { 20500cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 20519566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 20520cd7f59aSBarry Smith } 20530cd7f59aSBarry Smith } 2054dfb205c3SBarry Smith } 2055dfb205c3SBarry Smith 20569566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 20573bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 20583bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 20599566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 20609566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 20613bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 20623bd0feecSPierre Jolivet 20639566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 20643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2065dfb205c3SBarry Smith } 2066dfb205c3SBarry Smith 20670bad9183SKris Buschelman /*MC 2068fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2069828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2070828413b8SBarry Smith the matrix is stored. 2071828413b8SBarry Smith 2072828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 207311a5261eSBarry Smith can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 20740bad9183SKris Buschelman 20752ef1f0ffSBarry Smith Options Database Key: 207611a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 20770bad9183SKris Buschelman 20782ef1f0ffSBarry Smith Level: beginner 20792ef1f0ffSBarry Smith 208011a5261eSBarry Smith Note: 2081476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2082476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2083476417e5SBarry Smith 20841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 20850bad9183SKris Buschelman M*/ 20860bad9183SKris Buschelman 2087d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2088d71ae5a4SJacob Faibussowitsch { 2089b5df2d14SHong Zhang Mat_MPISBAIJ *b; 209094ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2091b5df2d14SHong Zhang 2092b5df2d14SHong Zhang PetscFunctionBegin; 20934dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 2094b0a32e0cSBarry Smith B->data = (void *)b; 2095aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values; 2096b5df2d14SHong Zhang 2097b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2098b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2099b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2100b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 210126fbe8dcSKarl Rupp 21029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 21039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2104b5df2d14SHong Zhang 2105b5df2d14SHong Zhang /* build local table of row and column ownerships */ 21069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2107b5df2d14SHong Zhang 2108b5df2d14SHong Zhang /* build cache for off array entries formed */ 21099566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 211026fbe8dcSKarl Rupp 2111b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21120298fd71SBarry Smith b->colmap = NULL; 21130298fd71SBarry Smith b->garray = NULL; 2114b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2115b5df2d14SHong Zhang 2116b5df2d14SHong Zhang /* stuff used in block assembly */ 2117f4259b30SLisandro Dalcin b->barray = NULL; 2118b5df2d14SHong Zhang 2119b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2120f4259b30SLisandro Dalcin b->lvec = NULL; 2121f4259b30SLisandro Dalcin b->Mvctx = NULL; 2122f4259b30SLisandro Dalcin b->slvec0 = NULL; 2123f4259b30SLisandro Dalcin b->slvec0b = NULL; 2124f4259b30SLisandro Dalcin b->slvec1 = NULL; 2125f4259b30SLisandro Dalcin b->slvec1a = NULL; 2126f4259b30SLisandro Dalcin b->slvec1b = NULL; 2127f4259b30SLisandro Dalcin b->sMvctx = NULL; 2128b5df2d14SHong Zhang 2129b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2130f4259b30SLisandro Dalcin b->rowindices = NULL; 2131f4259b30SLisandro Dalcin b->rowvalues = NULL; 2132b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2133b5df2d14SHong Zhang 2134b5df2d14SHong Zhang /* hash table stuff */ 2135f4259b30SLisandro Dalcin b->ht = NULL; 2136f4259b30SLisandro Dalcin b->hd = NULL; 2137b5df2d14SHong Zhang b->ht_size = 0; 2138b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2139b5df2d14SHong Zhang b->ht_fact = 0; 2140b5df2d14SHong Zhang b->ht_total_ct = 0; 2141b5df2d14SHong Zhang b->ht_insert_ct = 0; 2142b5df2d14SHong Zhang 21437dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 21447a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 21457a868f3eSHong Zhang 2146f4259b30SLisandro Dalcin b->in_loc = NULL; 2147f4259b30SLisandro Dalcin b->v_loc = NULL; 214859ffdab8SBarry Smith b->n_loc = 0; 214994ae4db5SBarry Smith 21509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 21519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 21529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 21539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 21546214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 21566214f412SHong Zhang #endif 2157d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 21589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2159d24d4204SJose E. Roman #endif 21609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 21619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2162aa5a9175SDahai Guo 2163b94d7dedSBarry Smith B->symmetric = PETSC_BOOL3_TRUE; 2164b94d7dedSBarry Smith B->structurally_symmetric = PETSC_BOOL3_TRUE; 2165b94d7dedSBarry Smith B->symmetry_eternal = PETSC_TRUE; 2166b94d7dedSBarry Smith B->structural_symmetry_eternal = PETSC_TRUE; 2167eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 2168b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_FALSE; 2169eb1ec7c1SStefano Zampini #else 2170b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_TRUE; 2171eb1ec7c1SStefano Zampini #endif 217213647f61SHong Zhang 21739566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2174d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 21759566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 217694ae4db5SBarry Smith if (flg) { 217794ae4db5SBarry Smith PetscReal fact = 1.39; 21789566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 21799566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 218094ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 21819566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 21829566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 218394ae4db5SBarry Smith } 2184d0609cedSBarry Smith PetscOptionsEnd(); 21853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2186b5df2d14SHong Zhang } 2187b5df2d14SHong Zhang 2188*2920cce0SJacob Faibussowitsch // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2189209238afSKris Buschelman /*MC 2190002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2191209238afSKris Buschelman 219211a5261eSBarry Smith This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 219311a5261eSBarry Smith and `MATMPISBAIJ` otherwise. 2194209238afSKris Buschelman 219511a5261eSBarry Smith Options Database Key: 2196c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2197209238afSKris Buschelman 2198209238afSKris Buschelman Level: beginner 2199209238afSKris Buschelman 22001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2201209238afSKris Buschelman M*/ 2202209238afSKris Buschelman 2203b5df2d14SHong Zhang /*@C 2204b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2205b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2206b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2207b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2208b5df2d14SHong Zhang 2209c3339decSBarry Smith Collective 2210b5df2d14SHong Zhang 2211b5df2d14SHong Zhang Input Parameters: 22121c4f3114SJed Brown + B - the matrix 2213bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2214bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2215b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2216b5df2d14SHong Zhang submatrix (same for all local rows) 2217b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22186d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22192ef1f0ffSBarry Smith (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 222095742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2221b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2222b5df2d14SHong Zhang submatrix (same for all local rows). 2223b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2224c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22252ef1f0ffSBarry Smith (possibly different for each block row) or `NULL`. 2226b5df2d14SHong Zhang 2227b5df2d14SHong Zhang Options Database Keys: 2228a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2229b5df2d14SHong Zhang block calculations (much slower) 2230a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2231b5df2d14SHong Zhang 22322ef1f0ffSBarry Smith Level: intermediate 22332ef1f0ffSBarry Smith 2234b5df2d14SHong Zhang Notes: 2235b5df2d14SHong Zhang 223611a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2237b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2238b5df2d14SHong Zhang 223949a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 224049a6f317SBarry Smith 2241b5df2d14SHong Zhang Storage Information: 2242b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2243b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2244b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2245b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2246b5df2d14SHong Zhang 2247b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 22482ef1f0ffSBarry Smith the local submatrix with either `d_nz` or `d_nnz` (not both). Set 22492ef1f0ffSBarry Smith `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2250b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 22512ef1f0ffSBarry Smith off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2252b5df2d14SHong Zhang 225311a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 2254aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 22552ef1f0ffSBarry Smith You can also run with the option `-info` and look for messages with the string 2256aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2257aa95bbe8SBarry Smith 2258b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2259b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2260b5df2d14SHong Zhang 2261b5df2d14SHong Zhang .vb 2262b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2263a4b1a0f6SJed Brown -------------------------- 2264c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2265c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2266c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2267a4b1a0f6SJed Brown -------------------------- 2268b5df2d14SHong Zhang .ve 2269b5df2d14SHong Zhang 2270b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2271b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 22726d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 227311a5261eSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2274b5df2d14SHong Zhang 22752ef1f0ffSBarry Smith Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 22766d10fdaeSSatish Balay plus the diagonal part of the d matrix, 22772ef1f0ffSBarry Smith and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2278c2fc9fa9SBarry Smith 2279b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 22802ef1f0ffSBarry Smith one expects `d_nz` >> `o_nz`. 2281b5df2d14SHong Zhang 22821cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2283b5df2d14SHong Zhang @*/ 2284d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2285d71ae5a4SJacob Faibussowitsch { 2286b5df2d14SHong Zhang PetscFunctionBegin; 22876ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 22886ba663aaSJed Brown PetscValidType(B, 1); 22896ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2290cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 22913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2292b5df2d14SHong Zhang } 2293b5df2d14SHong Zhang 2294*2920cce0SJacob Faibussowitsch // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2295a30f8f8cSSatish Balay /*@C 229611a5261eSBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2297a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2298a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 229920f4b53cSBarry Smith `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2300a30f8f8cSSatish Balay 2301d083f849SBarry Smith Collective 2302a30f8f8cSSatish Balay 2303a30f8f8cSSatish Balay Input Parameters: 2304a30f8f8cSSatish Balay + comm - MPI communicator 230511a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 230620f4b53cSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 230720f4b53cSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2308a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2309a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 231020f4b53cSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2311a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2312a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 231320f4b53cSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 231420f4b53cSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2315a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2316a30f8f8cSSatish Balay submatrix (same for all local rows) 2317a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23186d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23192ef1f0ffSBarry Smith (possibly different for each block block row) or `NULL`. 232095742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 232195742e49SBarry Smith set its value even if it is zero. 2322a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2323a30f8f8cSSatish Balay submatrix (same for all local rows). 2324a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2325a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23262ef1f0ffSBarry Smith each block row) or `NULL`. 2327a30f8f8cSSatish Balay 2328a30f8f8cSSatish Balay Output Parameter: 2329a30f8f8cSSatish Balay . A - the matrix 2330a30f8f8cSSatish Balay 2331a30f8f8cSSatish Balay Options Database Keys: 2332a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2333a30f8f8cSSatish Balay block calculations (much slower) 2334a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2335a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2336a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2337a30f8f8cSSatish Balay 23382ef1f0ffSBarry Smith Level: intermediate 23392ef1f0ffSBarry Smith 23402ef1f0ffSBarry Smith Notes: 234111a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2342f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 234311a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2344175b88e8SBarry Smith 2345d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 23466d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2347d1be2dadSMatthew Knepley 2348a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2349a30f8f8cSSatish Balay (possibly both). 2350a30f8f8cSSatish Balay 235111a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2352a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2353a30f8f8cSSatish Balay 235449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 235549a6f317SBarry Smith 2356a30f8f8cSSatish Balay Storage Information: 2357a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2358a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2359a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2360a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2361a30f8f8cSSatish Balay 2362a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 23632ef1f0ffSBarry Smith the local submatrix with either `d_nz` or `d_nnz` (not both). Set 23642ef1f0ffSBarry Smith `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2365a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 23662ef1f0ffSBarry Smith off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2367a30f8f8cSSatish Balay 2368a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2369a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2370a30f8f8cSSatish Balay 2371a30f8f8cSSatish Balay .vb 2372a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2373a4b1a0f6SJed Brown -------------------------- 2374c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2375c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2376c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2377a4b1a0f6SJed Brown -------------------------- 2378a30f8f8cSSatish Balay .ve 2379a30f8f8cSSatish Balay 2380a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2381a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 23826d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 23832ef1f0ffSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2384a30f8f8cSSatish Balay 23852ef1f0ffSBarry Smith Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 23866d10fdaeSSatish Balay plus the diagonal part of the d matrix, 23872ef1f0ffSBarry Smith and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2388a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 23892ef1f0ffSBarry Smith one expects `d_nz` >> `o_nz`. 2390a30f8f8cSSatish Balay 23911cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2392a30f8f8cSSatish Balay @*/ 2393d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2394d71ae5a4SJacob Faibussowitsch { 23951302d50aSBarry Smith PetscMPIInt size; 2396a30f8f8cSSatish Balay 2397a30f8f8cSSatish Balay PetscFunctionBegin; 23989566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 23999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 24009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2401273d9f13SBarry Smith if (size > 1) { 24029566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPISBAIJ)); 24039566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2404273d9f13SBarry Smith } else { 24059566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQSBAIJ)); 24069566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2407273d9f13SBarry Smith } 24083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2409a30f8f8cSSatish Balay } 2410a30f8f8cSSatish Balay 2411d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2412d71ae5a4SJacob Faibussowitsch { 2413a30f8f8cSSatish Balay Mat mat; 2414a30f8f8cSSatish Balay Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2415d0f46423SBarry Smith PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2416387bc808SHong Zhang PetscScalar *array; 2417a30f8f8cSSatish Balay 2418a30f8f8cSSatish Balay PetscFunctionBegin; 2419f4259b30SLisandro Dalcin *newmat = NULL; 242026fbe8dcSKarl Rupp 24219566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 24229566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 24239566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 24249566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 24259566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2426e1b6402fSHong Zhang 2427d5f3da31SBarry Smith mat->factortype = matin->factortype; 2428273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 242982327fa8SHong Zhang mat->assembled = PETSC_TRUE; 24307fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24317fff6886SHong Zhang 2432b5df2d14SHong Zhang a = (Mat_MPISBAIJ *)mat->data; 2433a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2434a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2435a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2436a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2437a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2438a30f8f8cSSatish Balay 2439a30f8f8cSSatish Balay a->size = oldmat->size; 2440a30f8f8cSSatish Balay a->rank = oldmat->rank; 2441a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2442a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2443f4259b30SLisandro Dalcin a->rowindices = NULL; 2444f4259b30SLisandro Dalcin a->rowvalues = NULL; 2445a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2446f4259b30SLisandro Dalcin a->barray = NULL; 2447899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2448899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2449899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2450899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2451a30f8f8cSSatish Balay 2452a30f8f8cSSatish Balay /* hash table stuff */ 2453f4259b30SLisandro Dalcin a->ht = NULL; 2454f4259b30SLisandro Dalcin a->hd = NULL; 2455a30f8f8cSSatish Balay a->ht_size = 0; 2456a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2457a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2458a30f8f8cSSatish Balay a->ht_total_ct = 0; 2459a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2460a30f8f8cSSatish Balay 24619566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2462a30f8f8cSSatish Balay if (oldmat->colmap) { 2463a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2464eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2465a30f8f8cSSatish Balay #else 24669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 24679566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2468a30f8f8cSSatish Balay #endif 2469f4259b30SLisandro Dalcin } else a->colmap = NULL; 2470387bc808SHong Zhang 2471a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 24729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len, &a->garray)); 24739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2474f4259b30SLisandro Dalcin } else a->garray = NULL; 2475a30f8f8cSSatish Balay 24769566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 24779566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 24789566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 247982327fa8SHong Zhang 24809566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 24819566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2482387bc808SHong Zhang 24839566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(a->slvec1, &nt)); 24849566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec1, &array)); 24859566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 24869566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 24879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec1, &array)); 24889566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &array)); 24899566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 24909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &array)); 2491387bc808SHong Zhang 2492387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 24939566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2494387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 249582327fa8SHong Zhang 24969566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 24979566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 24989566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2499a30f8f8cSSatish Balay *newmat = mat; 25003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2501a30f8f8cSSatish Balay } 2502a30f8f8cSSatish Balay 2503618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2504618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2505618cc2edSLisandro Dalcin 2506d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2507d71ae5a4SJacob Faibussowitsch { 25087f489da9SVaclav Hapla PetscBool isbinary; 250995936485SShri Abhyankar 251095936485SShri Abhyankar PetscFunctionBegin; 25119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 25125f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 25139566063dSJacob Faibussowitsch PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 25143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 251595936485SShri Abhyankar } 251695936485SShri Abhyankar 2517d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2518d71ae5a4SJacob Faibussowitsch { 251924d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2520f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2521ca54ac64SHong Zhang PetscReal atmp; 252287828ca2SBarry Smith PetscReal *work, *svalues, *rvalues; 25231302d50aSBarry Smith PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 25241302d50aSBarry Smith PetscMPIInt rank, size; 25251302d50aSBarry Smith PetscInt *rowners_bs, dest, count, source; 252687828ca2SBarry Smith PetscScalar *va; 25278a1c53f2SBarry Smith MatScalar *ba; 2528f4c0e9e4SHong Zhang MPI_Status stat; 252924d5174aSHong Zhang 253024d5174aSHong Zhang PetscFunctionBegin; 25315f80ce2aSJacob Faibussowitsch PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 25329566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 25339566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &va)); 2534f4c0e9e4SHong Zhang 25359566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 25369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2537f4c0e9e4SHong Zhang 2538d0f46423SBarry Smith bs = A->rmap->bs; 2539f4c0e9e4SHong Zhang mbs = a->mbs; 2540f4c0e9e4SHong Zhang Mbs = a->Mbs; 2541f4c0e9e4SHong Zhang ba = b->a; 2542f4c0e9e4SHong Zhang bi = b->i; 2543f4c0e9e4SHong Zhang bj = b->j; 2544f4c0e9e4SHong Zhang 2545f4c0e9e4SHong Zhang /* find ownerships */ 2546d0f46423SBarry Smith rowners_bs = A->rmap->range; 2547f4c0e9e4SHong Zhang 2548f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 25499566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs * Mbs, &work)); 2550f4c0e9e4SHong Zhang 2551f4c0e9e4SHong Zhang /* row_max for B */ 2552b8475685SHong Zhang if (rank != size - 1) { 2553f4c0e9e4SHong Zhang for (i = 0; i < mbs; i++) { 25549371c9d4SSatish Balay ncols = bi[1] - bi[0]; 25559371c9d4SSatish Balay bi++; 2556f4c0e9e4SHong Zhang brow = bs * i; 2557f4c0e9e4SHong Zhang for (j = 0; j < ncols; j++) { 2558f4c0e9e4SHong Zhang bcol = bs * (*bj); 2559f4c0e9e4SHong Zhang for (kcol = 0; kcol < bs; kcol++) { 2560ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 256104d41228SHong Zhang col += rowners_bs[rank + 1]; /* global col index */ 2562f4c0e9e4SHong Zhang for (krow = 0; krow < bs; krow++) { 25639371c9d4SSatish Balay atmp = PetscAbsScalar(*ba); 25649371c9d4SSatish Balay ba++; 2565ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2566ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2567f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2568f4c0e9e4SHong Zhang } 2569f4c0e9e4SHong Zhang } 2570f4c0e9e4SHong Zhang bj++; 2571f4c0e9e4SHong Zhang } 2572f4c0e9e4SHong Zhang } 2573f4c0e9e4SHong Zhang 2574f4c0e9e4SHong Zhang /* send values to its owners */ 2575f4c0e9e4SHong Zhang for (dest = rank + 1; dest < size; dest++) { 2576f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2577ca54ac64SHong Zhang count = rowners_bs[dest + 1] - rowners_bs[dest]; 25789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2579ca54ac64SHong Zhang } 2580f4c0e9e4SHong Zhang } 2581f4c0e9e4SHong Zhang 2582f4c0e9e4SHong Zhang /* receive values */ 2583ca54ac64SHong Zhang if (rank) { 2584f4c0e9e4SHong Zhang rvalues = work; 2585ca54ac64SHong Zhang count = rowners_bs[rank + 1] - rowners_bs[rank]; 2586f4c0e9e4SHong Zhang for (source = 0; source < rank; source++) { 25879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2588f4c0e9e4SHong Zhang /* process values */ 2589f4c0e9e4SHong Zhang for (i = 0; i < count; i++) { 2590ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2591f4c0e9e4SHong Zhang } 2592f4c0e9e4SHong Zhang } 2593ca54ac64SHong Zhang } 2594f4c0e9e4SHong Zhang 25959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &va)); 25969566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 25973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259824d5174aSHong Zhang } 25992798e883SHong Zhang 2600d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2601d71ae5a4SJacob Faibussowitsch { 26022798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2603d0f46423SBarry Smith PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 26043649974fSBarry Smith PetscScalar *x, *ptr, *from; 2605ffe4fb16SHong Zhang Vec bb1; 26063649974fSBarry Smith const PetscScalar *b; 2607ffe4fb16SHong Zhang 2608ffe4fb16SHong Zhang PetscFunctionBegin; 26095f80ce2aSJacob Faibussowitsch PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 26105f80ce2aSJacob Faibussowitsch PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2611ffe4fb16SHong Zhang 2612a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 26139566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 26143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2615a2b30743SBarry Smith } 2616a2b30743SBarry Smith 2617ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2618ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 26199566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2620ffe4fb16SHong Zhang its--; 2621ffe4fb16SHong Zhang } 2622ffe4fb16SHong Zhang 26239566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &bb1)); 2624ffe4fb16SHong Zhang while (its--) { 2625ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 26269566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2627ffe4fb16SHong Zhang 2628ffe4fb16SHong Zhang /* copy xx into slvec0a */ 26299566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &ptr)); 26309566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26319566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 26329566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2633ffe4fb16SHong Zhang 26349566063dSJacob Faibussowitsch PetscCall(VecScale(mat->slvec0, -1.0)); 2635ffe4fb16SHong Zhang 2636ffe4fb16SHong Zhang /* copy bb into slvec1a */ 26379566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1, &ptr)); 26389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26399566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 26409566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2641ffe4fb16SHong Zhang 2642ffe4fb16SHong Zhang /* set slvec1b = 0 */ 2643629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2644629a200eSBarry Smith PetscCall(VecZeroEntries(mat->slvec1b)); 2645ffe4fb16SHong Zhang 26469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 26479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 26489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 26499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2650ffe4fb16SHong Zhang 2651ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 26529566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2653ffe4fb16SHong Zhang 2654ffe4fb16SHong Zhang /* local diagonal sweep */ 26559566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2656ffe4fb16SHong Zhang } 26579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 2658fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26599566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2660fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26619566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2662fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2663fa22f6d0SBarry Smith Vec xx1; 2664ace3abfcSBarry Smith PetscBool hasop; 266520f1ed55SBarry Smith const PetscScalar *diag; 2666887ee2caSBarry Smith PetscScalar *sl, scale = (omega - 2.0) / omega; 266720f1ed55SBarry Smith PetscInt i, n; 2668fa22f6d0SBarry Smith 2669fa22f6d0SBarry Smith if (!mat->xx1) { 26709566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->xx1)); 26719566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->bb1)); 2672fa22f6d0SBarry Smith } 2673fa22f6d0SBarry Smith xx1 = mat->xx1; 2674fa22f6d0SBarry Smith bb1 = mat->bb1; 2675fa22f6d0SBarry Smith 26769566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2677fa22f6d0SBarry Smith 2678fa22f6d0SBarry Smith if (!mat->diag) { 2679effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 26809566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 26819566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin, mat->diag)); 2682fa22f6d0SBarry Smith } 26839566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2684fa22f6d0SBarry Smith 2685fa22f6d0SBarry Smith if (hasop) { 26869566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 26879566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 268820f1ed55SBarry Smith } else { 268920f1ed55SBarry Smith /* 269020f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 26919566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 26929566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 269320f1ed55SBarry Smith */ 26949566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1a, &sl)); 26959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mat->diag, &diag)); 26969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26979566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26989566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &n)); 2699887ee2caSBarry Smith if (omega == 1.0) { 270026fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 27019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * n)); 2702887ee2caSBarry Smith } else { 270326fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 27049566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * n)); 2705887ee2caSBarry Smith } 27069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 27079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 27089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 27099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 271020f1ed55SBarry Smith } 2711fa22f6d0SBarry Smith 2712fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 2713629a200eSBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2714629a200eSBarry Smith PetscCall(VecZeroEntries(mat->slvec1b)); 27159566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 27169566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &from)); 27179566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 27189566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 27199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &from)); 27209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 27219566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27229566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27239566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2724fa22f6d0SBarry Smith 2725fa22f6d0SBarry Smith /* local sweep */ 27269566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 27279566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx, 1.0, xx1)); 2728f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 27293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2730ffe4fb16SHong Zhang } 2731ffe4fb16SHong Zhang 2732dfb205c3SBarry Smith /*@ 273311a5261eSBarry Smith MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2734dfb205c3SBarry Smith CSR format the local rows. 2735dfb205c3SBarry Smith 2736d083f849SBarry Smith Collective 2737dfb205c3SBarry Smith 2738dfb205c3SBarry Smith Input Parameters: 2739dfb205c3SBarry Smith + comm - MPI communicator 2740dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 274111a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 2742dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 274311a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 27442ef1f0ffSBarry Smith calculated if `N` is given) For square matrices `n` is almost always `m`. 27452ef1f0ffSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 27462ef1f0ffSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2747483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2748dfb205c3SBarry Smith . j - column indices 2749dfb205c3SBarry Smith - a - matrix values 2750dfb205c3SBarry Smith 2751dfb205c3SBarry Smith Output Parameter: 2752dfb205c3SBarry Smith . mat - the matrix 2753dfb205c3SBarry Smith 2754dfb205c3SBarry Smith Level: intermediate 2755dfb205c3SBarry Smith 2756dfb205c3SBarry Smith Notes: 27572ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 27582ef1f0ffSBarry Smith thus you CANNOT change the matrix entries by changing the values of `a` after you have 27592ef1f0ffSBarry Smith called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2760dfb205c3SBarry Smith 27612ef1f0ffSBarry Smith The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2762dfb205c3SBarry Smith 27631cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2764db781477SPatrick Sanan `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2765dfb205c3SBarry Smith @*/ 2766d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2767d71ae5a4SJacob Faibussowitsch { 2768dfb205c3SBarry Smith PetscFunctionBegin; 27695f80ce2aSJacob Faibussowitsch PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 27705f80ce2aSJacob Faibussowitsch PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 27719566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 27729566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 27739566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPISBAIJ)); 27749566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 27753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2776dfb205c3SBarry Smith } 2777dfb205c3SBarry Smith 2778dfb205c3SBarry Smith /*@C 277911a5261eSBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2780dfb205c3SBarry Smith 2781d083f849SBarry Smith Collective 2782dfb205c3SBarry Smith 2783dfb205c3SBarry Smith Input Parameters: 27841c4f3114SJed Brown + B - the matrix 2785dfb205c3SBarry Smith . bs - the block size 27862ef1f0ffSBarry Smith . i - the indices into `j` for the start of each local row (starts with zero) 2787dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2788dfb205c3SBarry Smith - v - optional values in the matrix 2789dfb205c3SBarry Smith 2790664954b6SBarry Smith Level: advanced 2791664954b6SBarry Smith 2792664954b6SBarry Smith Notes: 27930cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 27940cd7f59aSBarry Smith and usually the numerical values as well 27950cd7f59aSBarry Smith 279650c5228eSBarry Smith Any entries below the diagonal are ignored 2797dfb205c3SBarry Smith 27981cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2799dfb205c3SBarry Smith @*/ 2800d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2801d71ae5a4SJacob Faibussowitsch { 2802dfb205c3SBarry Smith PetscFunctionBegin; 2803cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 28043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2805dfb205c3SBarry Smith } 2806dfb205c3SBarry Smith 2807d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2808d71ae5a4SJacob Faibussowitsch { 280910c56fdeSHong Zhang PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 281010c56fdeSHong Zhang PetscInt *indx; 281110c56fdeSHong Zhang PetscScalar *values; 2812dfb205c3SBarry Smith 28134dcd73b1SHong Zhang PetscFunctionBegin; 28149566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 281510c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 281610c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2817de25e9cbSPierre Jolivet PetscInt *dnz, *onz, mbs, Nbs, nbs; 281810c56fdeSHong Zhang PetscInt *bindx, rmax = a->rmax, j; 2819de25e9cbSPierre Jolivet PetscMPIInt rank, size; 28204dcd73b1SHong Zhang 28219566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28229371c9d4SSatish Balay mbs = m / bs; 28239371c9d4SSatish Balay Nbs = N / cbs; 282448a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2825da91a574SPierre Jolivet nbs = n / cbs; 28264dcd73b1SHong Zhang 28279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(rmax, &bindx)); 2828d0609cedSBarry Smith MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2829de25e9cbSPierre Jolivet 28309566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 28319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &size)); 2832de25e9cbSPierre Jolivet if (rank == size - 1) { 2833de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 28345f80ce2aSJacob Faibussowitsch PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2835de25e9cbSPierre Jolivet } 2836de25e9cbSPierre Jolivet 2837d0609cedSBarry Smith rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 28389566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 283910c56fdeSHong Zhang for (i = 0; i < mbs; i++) { 28409566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 28414dcd73b1SHong Zhang nnz = nnz / bs; 28424dcd73b1SHong Zhang for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 28439566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 28449566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 28454dcd73b1SHong Zhang } 28469566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28479566063dSJacob Faibussowitsch PetscCall(PetscFree(bindx)); 28484dcd73b1SHong Zhang 28499566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 28509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 28519566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 28529566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, MATSBAIJ)); 28539566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 28549566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2855d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 28564dcd73b1SHong Zhang } 28574dcd73b1SHong Zhang 285810c56fdeSHong Zhang /* numeric phase */ 28599566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28609566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 28614dcd73b1SHong Zhang 28629566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 28634dcd73b1SHong Zhang for (i = 0; i < m; i++) { 28649566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28654dcd73b1SHong Zhang Ii = i + rstart; 28669566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 28679566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28684dcd73b1SHong Zhang } 28699566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28709566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 28719566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 28723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28734dcd73b1SHong Zhang } 2874