1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 5a30f8f8cSSatish Balay 626cec326SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 726cec326SBarry Smith { 826cec326SBarry Smith Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 926cec326SBarry Smith 1026cec326SBarry Smith PetscFunctionBegin; 1126cec326SBarry Smith #if defined(PETSC_USE_LOG) 1226cec326SBarry Smith PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 1326cec326SBarry Smith #endif 1426cec326SBarry Smith PetscCall(MatStashDestroy_Private(&mat->stash)); 1526cec326SBarry Smith PetscCall(MatStashDestroy_Private(&mat->bstash)); 1626cec326SBarry Smith PetscCall(MatDestroy(&baij->A)); 1726cec326SBarry Smith PetscCall(MatDestroy(&baij->B)); 1826cec326SBarry Smith #if defined(PETSC_USE_CTABLE) 1926cec326SBarry Smith PetscCall(PetscHMapIDestroy(&baij->colmap)); 2026cec326SBarry Smith #else 2126cec326SBarry Smith PetscCall(PetscFree(baij->colmap)); 2226cec326SBarry Smith #endif 2326cec326SBarry Smith PetscCall(PetscFree(baij->garray)); 2426cec326SBarry Smith PetscCall(VecDestroy(&baij->lvec)); 2526cec326SBarry Smith PetscCall(VecScatterDestroy(&baij->Mvctx)); 2626cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec0)); 2726cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec0b)); 2826cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1)); 2926cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1a)); 3026cec326SBarry Smith PetscCall(VecDestroy(&baij->slvec1b)); 3126cec326SBarry Smith PetscCall(VecScatterDestroy(&baij->sMvctx)); 3226cec326SBarry Smith PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 3326cec326SBarry Smith PetscCall(PetscFree(baij->barray)); 3426cec326SBarry Smith PetscCall(PetscFree(baij->hd)); 3526cec326SBarry Smith PetscCall(VecDestroy(&baij->diag)); 3626cec326SBarry Smith PetscCall(VecDestroy(&baij->bb1)); 3726cec326SBarry Smith PetscCall(VecDestroy(&baij->xx1)); 3826cec326SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 3926cec326SBarry Smith PetscCall(PetscFree(baij->setvaluescopy)); 4026cec326SBarry Smith #endif 4126cec326SBarry Smith PetscCall(PetscFree(baij->in_loc)); 4226cec326SBarry Smith PetscCall(PetscFree(baij->v_loc)); 4326cec326SBarry Smith PetscCall(PetscFree(baij->rangebs)); 4426cec326SBarry Smith PetscCall(PetscFree(mat->data)); 4526cec326SBarry Smith 4626cec326SBarry Smith PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 4726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 4826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 4926cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 5026cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 5126cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL) 5226cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 5326cec326SBarry Smith #endif 5426cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK) 5526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 5626cec326SBarry Smith #endif 5726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 5826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 5926cec326SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 6026cec326SBarry Smith } 6126cec326SBarry Smith 6226cec326SBarry Smith /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 6326cec326SBarry Smith #define TYPE SBAIJ 6426cec326SBarry Smith #define TYPE_SBAIJ 6526cec326SBarry Smith #include "../src/mat/impls/aij/mpi/mpihashmat.h" 6626cec326SBarry Smith #undef TYPE 6726cec326SBarry Smith #undef TYPE_SBAIJ 6826cec326SBarry Smith 696214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 70cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 716214f412SHong Zhang #endif 72d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 73d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 74d24d4204SJose E. Roman #endif 75b147fbf3SStefano Zampini 76b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 77d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 78d71ae5a4SJacob Faibussowitsch { 79b147fbf3SStefano Zampini Mat preallocator; 80b147fbf3SStefano Zampini PetscInt r, rstart, rend; 81b147fbf3SStefano Zampini PetscInt bs, i, m, n, M, N; 82b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 83b147fbf3SStefano Zampini 84b147fbf3SStefano Zampini PetscFunctionBegin; 85b147fbf3SStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 86b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B, nm, 2); 87b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 88b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 899566063dSJacob Faibussowitsch PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 905f80ce2aSJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 91b147fbf3SStefano Zampini } 92b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B, fill, 5); 939566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(B, &bs)); 949566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &M, &N)); 959566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(B, &m, &n)); 969566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 979566063dSJacob Faibussowitsch PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(preallocator, bs)); 999566063dSJacob Faibussowitsch PetscCall(MatSetSizes(preallocator, m, n, M, N)); 1009566063dSJacob Faibussowitsch PetscCall(MatSetUp(preallocator)); 1019566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 102b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 103b147fbf3SStefano Zampini PetscInt ncols; 104b147fbf3SStefano Zampini const PetscInt *row; 105b147fbf3SStefano Zampini const PetscScalar *vals; 106b147fbf3SStefano Zampini 107b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 1089566063dSJacob Faibussowitsch PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 1099566063dSJacob Faibussowitsch PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 11048a46eb9SPierre Jolivet if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 1119566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 112b147fbf3SStefano Zampini } 113b147fbf3SStefano Zampini } 1149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 1159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 1169566063dSJacob Faibussowitsch PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&preallocator)); 1183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 119b147fbf3SStefano Zampini } 120b147fbf3SStefano Zampini 121d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 122d71ae5a4SJacob Faibussowitsch { 123b147fbf3SStefano Zampini Mat B; 124b147fbf3SStefano Zampini PetscInt r; 125b147fbf3SStefano Zampini 126b147fbf3SStefano Zampini PetscFunctionBegin; 127b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 12828d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE, isdense; 129b147fbf3SStefano Zampini PetscInt bs; 130b147fbf3SStefano Zampini 1319566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1329566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1339566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 1349566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &bs)); 1359566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, bs)); 1369566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 1389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 13928d58a37SPierre Jolivet if (!isdense) { 1409566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 1419566063dSJacob Faibussowitsch PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 1429566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 14328d58a37SPierre Jolivet } else { 1449566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 14528d58a37SPierre Jolivet } 14628d58a37SPierre Jolivet } else { 14728d58a37SPierre Jolivet B = *newmat; 1489566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(B)); 14928d58a37SPierre Jolivet } 150b147fbf3SStefano Zampini 1519566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 152b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 153b147fbf3SStefano Zampini PetscInt ncols; 154b147fbf3SStefano Zampini const PetscInt *row; 155b147fbf3SStefano Zampini const PetscScalar *vals; 156b147fbf3SStefano Zampini 1579566063dSJacob Faibussowitsch PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 1589566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 159eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 160b94d7dedSBarry Smith if (A->hermitian == PETSC_BOOL3_TRUE) { 161eb1ec7c1SStefano Zampini PetscInt i; 16248a46eb9SPierre Jolivet for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 163eb1ec7c1SStefano Zampini } else { 1649566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 165eb1ec7c1SStefano Zampini } 166eb1ec7c1SStefano Zampini #else 1679566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 168eb1ec7c1SStefano Zampini #endif 1699566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 170b147fbf3SStefano Zampini } 1719566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 1729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 174b147fbf3SStefano Zampini 175b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 1769566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 177b147fbf3SStefano Zampini } else { 178b147fbf3SStefano Zampini *newmat = B; 179b147fbf3SStefano Zampini } 1803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 181b147fbf3SStefano Zampini } 182b147fbf3SStefano Zampini 183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 184d71ae5a4SJacob Faibussowitsch { 185f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 186a30f8f8cSSatish Balay 187a30f8f8cSSatish Balay PetscFunctionBegin; 1889566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 1899566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191a30f8f8cSSatish Balay } 192a30f8f8cSSatish Balay 193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 194d71ae5a4SJacob Faibussowitsch { 195f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 196a30f8f8cSSatish Balay 197a30f8f8cSSatish Balay PetscFunctionBegin; 1989566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 1999566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 2003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 201a30f8f8cSSatish Balay } 202a30f8f8cSSatish Balay 203d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 204a30f8f8cSSatish Balay { \ 205a30f8f8cSSatish Balay brow = row / bs; \ 2069371c9d4SSatish Balay rp = aj + ai[brow]; \ 2079371c9d4SSatish Balay ap = aa + bs2 * ai[brow]; \ 2089371c9d4SSatish Balay rmax = aimax[brow]; \ 2099371c9d4SSatish Balay nrow = ailen[brow]; \ 210a30f8f8cSSatish Balay bcol = col / bs; \ 2119371c9d4SSatish Balay ridx = row % bs; \ 2129371c9d4SSatish Balay cidx = col % bs; \ 2139371c9d4SSatish Balay low = 0; \ 2149371c9d4SSatish Balay high = nrow; \ 215a30f8f8cSSatish Balay while (high - low > 3) { \ 216a30f8f8cSSatish Balay t = (low + high) / 2; \ 217a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 218a30f8f8cSSatish Balay else low = t; \ 219a30f8f8cSSatish Balay } \ 220a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 221a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 222a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 223a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 224a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 225a30f8f8cSSatish Balay else *bap = value; \ 226a30f8f8cSSatish Balay goto a_noinsert; \ 227a30f8f8cSSatish Balay } \ 228a30f8f8cSSatish Balay } \ 229a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 2305f80ce2aSJacob Faibussowitsch PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 231fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 232a30f8f8cSSatish Balay N = nrow++ - 1; \ 233a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2349566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2359566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2369566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 237a30f8f8cSSatish Balay rp[_i] = bcol; \ 238a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 239e56f5c9eSBarry Smith A->nonzerostate++; \ 240a30f8f8cSSatish Balay a_noinsert:; \ 241a30f8f8cSSatish Balay ailen[brow] = nrow; \ 242a30f8f8cSSatish Balay } 243e5e170daSBarry Smith 244d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 245a30f8f8cSSatish Balay { \ 246a30f8f8cSSatish Balay brow = row / bs; \ 2479371c9d4SSatish Balay rp = bj + bi[brow]; \ 2489371c9d4SSatish Balay ap = ba + bs2 * bi[brow]; \ 2499371c9d4SSatish Balay rmax = bimax[brow]; \ 2509371c9d4SSatish Balay nrow = bilen[brow]; \ 251a30f8f8cSSatish Balay bcol = col / bs; \ 2529371c9d4SSatish Balay ridx = row % bs; \ 2539371c9d4SSatish Balay cidx = col % bs; \ 2549371c9d4SSatish Balay low = 0; \ 2559371c9d4SSatish Balay high = nrow; \ 256a30f8f8cSSatish Balay while (high - low > 3) { \ 257a30f8f8cSSatish Balay t = (low + high) / 2; \ 258a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 259a30f8f8cSSatish Balay else low = t; \ 260a30f8f8cSSatish Balay } \ 261a30f8f8cSSatish Balay for (_i = low; _i < high; _i++) { \ 262a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 263a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 264a30f8f8cSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 265a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 266a30f8f8cSSatish Balay else *bap = value; \ 267a30f8f8cSSatish Balay goto b_noinsert; \ 268a30f8f8cSSatish Balay } \ 269a30f8f8cSSatish Balay } \ 270a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 2715f80ce2aSJacob Faibussowitsch PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 272fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 273a30f8f8cSSatish Balay N = nrow++ - 1; \ 274a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 2759566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 2769566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 2779566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 278a30f8f8cSSatish Balay rp[_i] = bcol; \ 279a30f8f8cSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 280e56f5c9eSBarry Smith B->nonzerostate++; \ 281a30f8f8cSSatish Balay b_noinsert:; \ 282a30f8f8cSSatish Balay bilen[brow] = nrow; \ 283a30f8f8cSSatish Balay } 284a30f8f8cSSatish Balay 285a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 286da81f932SPierre Jolivet Any a(i,j) with i>j input by user is ignored or generates an error 287a30f8f8cSSatish Balay */ 288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 289d71ae5a4SJacob Faibussowitsch { 290a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 291a30f8f8cSSatish Balay MatScalar value; 292ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 2931302d50aSBarry Smith PetscInt i, j, row, col; 294d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 295d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 296d0f46423SBarry Smith PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 297a30f8f8cSSatish Balay 298a30f8f8cSSatish Balay /* Some Variables required in the macro */ 299a30f8f8cSSatish Balay Mat A = baij->A; 300a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 3011302d50aSBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 302a30f8f8cSSatish Balay MatScalar *aa = a->a; 303a30f8f8cSSatish Balay 304a30f8f8cSSatish Balay Mat B = baij->B; 305a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 3061302d50aSBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 307a30f8f8cSSatish Balay MatScalar *ba = b->a; 308a30f8f8cSSatish Balay 3091302d50aSBarry Smith PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 3101302d50aSBarry Smith PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 311a30f8f8cSSatish Balay MatScalar *ap, *bap; 312a30f8f8cSSatish Balay 313a30f8f8cSSatish Balay /* for stash */ 3140298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 3150298fd71SBarry Smith MatScalar *v_loc = NULL; 316a30f8f8cSSatish Balay 317a30f8f8cSSatish Balay PetscFunctionBegin; 318a30f8f8cSSatish Balay if (!baij->donotstash) { 31959ffdab8SBarry Smith if (n > baij->n_loc) { 3209566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->in_loc)); 3219566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->v_loc)); 3229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->in_loc)); 3239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &baij->v_loc)); 32426fbe8dcSKarl Rupp 32559ffdab8SBarry Smith baij->n_loc = n; 32659ffdab8SBarry Smith } 32759ffdab8SBarry Smith in_loc = baij->in_loc; 32859ffdab8SBarry Smith v_loc = baij->v_loc; 329a30f8f8cSSatish Balay } 330a30f8f8cSSatish Balay 331a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 332a30f8f8cSSatish Balay if (im[i] < 0) continue; 3335f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 334a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 335a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 336a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 33701b2bd88SHong Zhang if (im[i] / bs > in[j] / bs) { 33801b2bd88SHong Zhang if (a->ignore_ltriangular) { 33901b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 34026fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 34101b2bd88SHong Zhang } 342a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 343a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 3449371c9d4SSatish Balay brow = row / bs; 3459371c9d4SSatish Balay bcol = col / bs; 346a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 347db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 348db4deed7SKarl Rupp else value = v[i + j * m]; 349d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 3509566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 351f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 352f7d195e4SLawrence Mitchell continue; 353f7d195e4SLawrence Mitchell } else { 354f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 355f7d195e4SLawrence Mitchell /* off-diag entry (B) */ 356a30f8f8cSSatish Balay if (mat->was_assembled) { 35748a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 358a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 359eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 36071730473SSatish Balay col = col - 1; 361a30f8f8cSSatish Balay #else 36271730473SSatish Balay col = baij->colmap[in[j] / bs] - 1; 363a30f8f8cSSatish Balay #endif 364a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 3659566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 366a30f8f8cSSatish Balay col = in[j]; 367a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 368a30f8f8cSSatish Balay B = baij->B; 369a30f8f8cSSatish Balay b = (Mat_SeqBAIJ *)(B)->data; 3709371c9d4SSatish Balay bimax = b->imax; 3719371c9d4SSatish Balay bi = b->i; 3729371c9d4SSatish Balay bilen = b->ilen; 3739371c9d4SSatish Balay bj = b->j; 374a30f8f8cSSatish Balay ba = b->a; 37571730473SSatish Balay } else col += in[j] % bs; 376a30f8f8cSSatish Balay } else col = in[j]; 377db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 378db4deed7SKarl Rupp else value = v[i + j * m]; 379d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 3809566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 381a30f8f8cSSatish Balay } 382a30f8f8cSSatish Balay } 383a30f8f8cSSatish Balay } else { /* off processor entry */ 3845f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 385a30f8f8cSSatish Balay if (!baij->donotstash) { 3865080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 387a30f8f8cSSatish Balay n_loc = 0; 388a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 389f65c83cfSHong Zhang if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 390a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 391a30f8f8cSSatish Balay if (roworiented) { 392a30f8f8cSSatish Balay v_loc[n_loc] = v[i * n + j]; 393a30f8f8cSSatish Balay } else { 394a30f8f8cSSatish Balay v_loc[n_loc] = v[j * m + i]; 395a30f8f8cSSatish Balay } 396a30f8f8cSSatish Balay n_loc++; 397a30f8f8cSSatish Balay } 3989566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 399a30f8f8cSSatish Balay } 400a30f8f8cSSatish Balay } 401a30f8f8cSSatish Balay } 4023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 403a30f8f8cSSatish Balay } 404a30f8f8cSSatish Balay 405d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 406d71ae5a4SJacob Faibussowitsch { 40736bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 40836bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 40936bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 41036bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 41136bd2089SBarry Smith PetscBool roworiented = a->roworiented; 41236bd2089SBarry Smith const PetscScalar *value = v; 41336bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 41436bd2089SBarry Smith 41536bd2089SBarry Smith PetscFunctionBegin; 41636bd2089SBarry Smith if (col < row) { 4173ba16761SJacob Faibussowitsch if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 41836bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 41936bd2089SBarry Smith } 42036bd2089SBarry Smith rp = aj + ai[row]; 42136bd2089SBarry Smith ap = aa + bs2 * ai[row]; 42236bd2089SBarry Smith rmax = imax[row]; 42336bd2089SBarry Smith nrow = ailen[row]; 42436bd2089SBarry Smith value = v; 42536bd2089SBarry Smith low = 0; 42636bd2089SBarry Smith high = nrow; 42736bd2089SBarry Smith 42836bd2089SBarry Smith while (high - low > 7) { 42936bd2089SBarry Smith t = (low + high) / 2; 43036bd2089SBarry Smith if (rp[t] > col) high = t; 43136bd2089SBarry Smith else low = t; 43236bd2089SBarry Smith } 43336bd2089SBarry Smith for (i = low; i < high; i++) { 43436bd2089SBarry Smith if (rp[i] > col) break; 43536bd2089SBarry Smith if (rp[i] == col) { 43636bd2089SBarry Smith bap = ap + bs2 * i; 43736bd2089SBarry Smith if (roworiented) { 43836bd2089SBarry Smith if (is == ADD_VALUES) { 43936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 440ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 44136bd2089SBarry Smith } 44236bd2089SBarry Smith } else { 44336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 444ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 44536bd2089SBarry Smith } 44636bd2089SBarry Smith } 44736bd2089SBarry Smith } else { 44836bd2089SBarry Smith if (is == ADD_VALUES) { 44936bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 450ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ += *value++; 45136bd2089SBarry Smith } 45236bd2089SBarry Smith } else { 45336bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 454ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 45536bd2089SBarry Smith } 45636bd2089SBarry Smith } 45736bd2089SBarry Smith } 45836bd2089SBarry Smith goto noinsert2; 45936bd2089SBarry Smith } 46036bd2089SBarry Smith } 46136bd2089SBarry Smith if (nonew == 1) goto noinsert2; 4625f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 46336bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 4649371c9d4SSatish Balay N = nrow++ - 1; 4659371c9d4SSatish Balay high++; 46636bd2089SBarry Smith /* shift up all the later entries in this row */ 4679566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 4689566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 46936bd2089SBarry Smith rp[i] = col; 47036bd2089SBarry Smith bap = ap + bs2 * i; 47136bd2089SBarry Smith if (roworiented) { 47236bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 473ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 47436bd2089SBarry Smith } 47536bd2089SBarry Smith } else { 47636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 477ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 47836bd2089SBarry Smith } 47936bd2089SBarry Smith } 48036bd2089SBarry Smith noinsert2:; 48136bd2089SBarry Smith ailen[row] = nrow; 4823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 48336bd2089SBarry Smith } 48436bd2089SBarry Smith 48536bd2089SBarry Smith /* 48636bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 48736bd2089SBarry Smith */ 488d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 489d71ae5a4SJacob Faibussowitsch { 49036bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 49136bd2089SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 49236bd2089SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 49336bd2089SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 49436bd2089SBarry Smith PetscBool roworiented = a->roworiented; 49536bd2089SBarry Smith const PetscScalar *value = v; 49636bd2089SBarry Smith MatScalar *ap, *aa = a->a, *bap; 49736bd2089SBarry Smith 49836bd2089SBarry Smith PetscFunctionBegin; 49936bd2089SBarry Smith rp = aj + ai[row]; 50036bd2089SBarry Smith ap = aa + bs2 * ai[row]; 50136bd2089SBarry Smith rmax = imax[row]; 50236bd2089SBarry Smith nrow = ailen[row]; 50336bd2089SBarry Smith low = 0; 50436bd2089SBarry Smith high = nrow; 50536bd2089SBarry Smith value = v; 50636bd2089SBarry Smith while (high - low > 7) { 50736bd2089SBarry Smith t = (low + high) / 2; 50836bd2089SBarry Smith if (rp[t] > col) high = t; 50936bd2089SBarry Smith else low = t; 51036bd2089SBarry Smith } 51136bd2089SBarry Smith for (i = low; i < high; i++) { 51236bd2089SBarry Smith if (rp[i] > col) break; 51336bd2089SBarry Smith if (rp[i] == col) { 51436bd2089SBarry Smith bap = ap + bs2 * i; 51536bd2089SBarry Smith if (roworiented) { 51636bd2089SBarry Smith if (is == ADD_VALUES) { 51736bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 518ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 51936bd2089SBarry Smith } 52036bd2089SBarry Smith } else { 52136bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 522ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 52336bd2089SBarry Smith } 52436bd2089SBarry Smith } 52536bd2089SBarry Smith } else { 52636bd2089SBarry Smith if (is == ADD_VALUES) { 52736bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 528ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 52936bd2089SBarry Smith bap += bs; 53036bd2089SBarry Smith } 53136bd2089SBarry Smith } else { 53236bd2089SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 533ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 53436bd2089SBarry Smith bap += bs; 53536bd2089SBarry Smith } 53636bd2089SBarry Smith } 53736bd2089SBarry Smith } 53836bd2089SBarry Smith goto noinsert2; 53936bd2089SBarry Smith } 54036bd2089SBarry Smith } 54136bd2089SBarry Smith if (nonew == 1) goto noinsert2; 5425f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 54336bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 5449371c9d4SSatish Balay N = nrow++ - 1; 5459371c9d4SSatish Balay high++; 54636bd2089SBarry Smith /* shift up all the later entries in this row */ 5479566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 5489566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 54936bd2089SBarry Smith rp[i] = col; 55036bd2089SBarry Smith bap = ap + bs2 * i; 55136bd2089SBarry Smith if (roworiented) { 55236bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 553ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 55436bd2089SBarry Smith } 55536bd2089SBarry Smith } else { 55636bd2089SBarry Smith for (ii = 0; ii < bs; ii++) { 557ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 55836bd2089SBarry Smith } 55936bd2089SBarry Smith } 56036bd2089SBarry Smith noinsert2:; 56136bd2089SBarry Smith ailen[row] = nrow; 5623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56336bd2089SBarry Smith } 56436bd2089SBarry Smith 56536bd2089SBarry Smith /* 56636bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 56736bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 56836bd2089SBarry Smith */ 569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 570d71ae5a4SJacob Faibussowitsch { 5710880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 572f15d580aSBarry Smith const MatScalar *value; 573f15d580aSBarry Smith MatScalar *barray = baij->barray; 574ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 575899cda47SBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 576476417e5SBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 577476417e5SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 5780880e062SHong Zhang 579a30f8f8cSSatish Balay PetscFunctionBegin; 5800880e062SHong Zhang if (!barray) { 5819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 5820880e062SHong Zhang baij->barray = barray; 5830880e062SHong Zhang } 5840880e062SHong Zhang 5850880e062SHong Zhang if (roworiented) { 5860880e062SHong Zhang stepval = (n - 1) * bs; 5870880e062SHong Zhang } else { 5880880e062SHong Zhang stepval = (m - 1) * bs; 5890880e062SHong Zhang } 5900880e062SHong Zhang for (i = 0; i < m; i++) { 5910880e062SHong Zhang if (im[i] < 0) continue; 5926bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 5930880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5940880e062SHong Zhang row = im[i] - rstart; 5950880e062SHong Zhang for (j = 0; j < n; j++) { 596f3f98c53SJed Brown if (im[i] > in[j]) { 597f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 598e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 599f3f98c53SJed Brown } 6000880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 6010880e062SHong Zhang if ((roworiented) && (n == 1)) { 602f15d580aSBarry Smith barray = (MatScalar *)v + i * bs2; 6030880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 604f15d580aSBarry Smith barray = (MatScalar *)v + j * bs2; 6050880e062SHong Zhang } else { /* Here a copy is required */ 6060880e062SHong Zhang if (roworiented) { 6070880e062SHong Zhang value = v + i * (stepval + bs) * bs + j * bs; 6080880e062SHong Zhang } else { 6090880e062SHong Zhang value = v + j * (stepval + bs) * bs + i * bs; 6100880e062SHong Zhang } 6110880e062SHong Zhang for (ii = 0; ii < bs; ii++, value += stepval) { 612ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *barray++ = *value++; 6130880e062SHong Zhang } 6140880e062SHong Zhang barray -= bs2; 6150880e062SHong Zhang } 6160880e062SHong Zhang 6170880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 6180880e062SHong Zhang col = in[j] - cstart; 6199566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 620f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 621f7d195e4SLawrence Mitchell continue; 622f7d195e4SLawrence Mitchell } else { 623f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 6240880e062SHong Zhang if (mat->was_assembled) { 62548a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 6260880e062SHong Zhang 6272515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 6280880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 6299371c9d4SSatish Balay { 6309371c9d4SSatish Balay PetscInt data; 631eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 63208401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 6330880e062SHong Zhang } 6340880e062SHong Zhang #else 63508401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 6360880e062SHong Zhang #endif 6370880e062SHong Zhang #endif 6380880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 639eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 6400880e062SHong Zhang col = (col - 1) / bs; 6410880e062SHong Zhang #else 6420880e062SHong Zhang col = (baij->colmap[in[j]] - 1) / bs; 6430880e062SHong Zhang #endif 6440880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 6459566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPISBAIJ(mat)); 6460880e062SHong Zhang col = in[j]; 6470880e062SHong Zhang } 64826fbe8dcSKarl Rupp } else col = in[j]; 6499566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 6500880e062SHong Zhang } 6510880e062SHong Zhang } 6520880e062SHong Zhang } else { 6535f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 6540880e062SHong Zhang if (!baij->donotstash) { 6550880e062SHong Zhang if (roworiented) { 6569566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 6570880e062SHong Zhang } else { 6589566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 6590880e062SHong Zhang } 6600880e062SHong Zhang } 6610880e062SHong Zhang } 6620880e062SHong Zhang } 6633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 664a30f8f8cSSatish Balay } 665a30f8f8cSSatish Balay 666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 667d71ae5a4SJacob Faibussowitsch { 668f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 669d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 670d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 671a30f8f8cSSatish Balay 672a30f8f8cSSatish Balay PetscFunctionBegin; 673a30f8f8cSSatish Balay for (i = 0; i < m; i++) { 67454c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 67554c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 676a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 677a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 678a30f8f8cSSatish Balay for (j = 0; j < n; j++) { 67954c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 68054c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 681a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 682a30f8f8cSSatish Balay col = idxn[j] - bscstart; 6839566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 684a30f8f8cSSatish Balay } else { 68548a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 686a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 687eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 688a30f8f8cSSatish Balay data--; 689a30f8f8cSSatish Balay #else 690a30f8f8cSSatish Balay data = baij->colmap[idxn[j] / bs] - 1; 691a30f8f8cSSatish Balay #endif 692a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 693a30f8f8cSSatish Balay else { 694a30f8f8cSSatish Balay col = data + idxn[j] % bs; 6959566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 696a30f8f8cSSatish Balay } 697a30f8f8cSSatish Balay } 698a30f8f8cSSatish Balay } 699f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 700a30f8f8cSSatish Balay } 7013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 702a30f8f8cSSatish Balay } 703a30f8f8cSSatish Balay 704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 705d71ae5a4SJacob Faibussowitsch { 706a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 707a30f8f8cSSatish Balay PetscReal sum[2], *lnorm2; 708a30f8f8cSSatish Balay 709a30f8f8cSSatish Balay PetscFunctionBegin; 710a30f8f8cSSatish Balay if (baij->size == 1) { 7119566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, norm)); 712a30f8f8cSSatish Balay } else { 713a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 7149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2, &lnorm2)); 7159566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, lnorm2)); 7169371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 7179371c9d4SSatish Balay lnorm2++; /* squar power of norm(A) */ 7189566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->B, type, lnorm2)); 7199371c9d4SSatish Balay *lnorm2 = (*lnorm2) * (*lnorm2); 7209371c9d4SSatish Balay lnorm2--; /* squar power of norm(B) */ 7211c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7228f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 7239566063dSJacob Faibussowitsch PetscCall(PetscFree(lnorm2)); 7240b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 7250b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 7260b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 7270b8dc8d2SHong Zhang PetscReal *rsum, *rsum2, vabs; 728899cda47SBarry Smith PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 729d0f46423SBarry Smith PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 7300b8dc8d2SHong Zhang MatScalar *v; 7310b8dc8d2SHong Zhang 7329566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 7339566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 7340b8dc8d2SHong Zhang /* Amat */ 7359371c9d4SSatish Balay v = amat->a; 7369371c9d4SSatish Balay jj = amat->j; 7370b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 7380b8dc8d2SHong Zhang grow = bs * (rstart + brow); 7390b8dc8d2SHong Zhang nz = amat->i[brow + 1] - amat->i[brow]; 7400b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 7419371c9d4SSatish Balay gcol = bs * (rstart + *jj); 7429371c9d4SSatish Balay jj++; 7430b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7440b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7459371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7469371c9d4SSatish Balay v++; 7470b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7480b8dc8d2SHong Zhang /* non-diagonal block */ 7490b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 7500b8dc8d2SHong Zhang } 7510b8dc8d2SHong Zhang } 7520b8dc8d2SHong Zhang } 7539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7540b8dc8d2SHong Zhang } 7550b8dc8d2SHong Zhang /* Bmat */ 7569371c9d4SSatish Balay v = bmat->a; 7579371c9d4SSatish Balay jj = bmat->j; 7580b8dc8d2SHong Zhang for (brow = 0; brow < mbs; brow++) { 7590b8dc8d2SHong Zhang grow = bs * (rstart + brow); 7600b8dc8d2SHong Zhang nz = bmat->i[brow + 1] - bmat->i[brow]; 7610b8dc8d2SHong Zhang for (bcol = 0; bcol < nz; bcol++) { 7629371c9d4SSatish Balay gcol = bs * garray[*jj]; 7639371c9d4SSatish Balay jj++; 7640b8dc8d2SHong Zhang for (col = 0; col < bs; col++) { 7650b8dc8d2SHong Zhang for (row = 0; row < bs; row++) { 7669371c9d4SSatish Balay vabs = PetscAbsScalar(*v); 7679371c9d4SSatish Balay v++; 7680b8dc8d2SHong Zhang rsum[gcol + col] += vabs; 7690b8dc8d2SHong Zhang rsum[grow + row] += vabs; 7700b8dc8d2SHong Zhang } 7710b8dc8d2SHong Zhang } 7720b8dc8d2SHong Zhang } 7739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nz * bs * bs)); 7740b8dc8d2SHong Zhang } 7751c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 7760b8dc8d2SHong Zhang *norm = 0.0; 777d0f46423SBarry Smith for (col = 0; col < mat->cmap->N; col++) { 7780b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7790b8dc8d2SHong Zhang } 7809566063dSJacob Faibussowitsch PetscCall(PetscFree2(rsum, rsum2)); 781f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 782a30f8f8cSSatish Balay } 7833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 784a30f8f8cSSatish Balay } 785a30f8f8cSSatish Balay 786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 787d71ae5a4SJacob Faibussowitsch { 788a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 7891302d50aSBarry Smith PetscInt nstash, reallocs; 790a30f8f8cSSatish Balay 791a30f8f8cSSatish Balay PetscFunctionBegin; 7923ba16761SJacob Faibussowitsch if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 793a30f8f8cSSatish Balay 7949566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 7959566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 7969566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7979566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7989566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7999566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 8003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 801a30f8f8cSSatish Balay } 802a30f8f8cSSatish Balay 803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 804d71ae5a4SJacob Faibussowitsch { 805a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 806a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 80713f74950SBarry Smith PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 808e44c0bd4SBarry Smith PetscInt *row, *col; 809ace3abfcSBarry Smith PetscBool other_disassembled; 81013f74950SBarry Smith PetscMPIInt n; 811ace3abfcSBarry Smith PetscBool r1, r2, r3; 812a30f8f8cSSatish Balay MatScalar *val; 813a30f8f8cSSatish Balay 81491c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 815a30f8f8cSSatish Balay PetscFunctionBegin; 8164cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 817a30f8f8cSSatish Balay while (1) { 8189566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 819a30f8f8cSSatish Balay if (!flg) break; 820a30f8f8cSSatish Balay 821a30f8f8cSSatish Balay for (i = 0; i < n;) { 822a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 82326fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 82426fbe8dcSKarl Rupp if (row[j] != rstart) break; 82526fbe8dcSKarl Rupp } 826a30f8f8cSSatish Balay if (j < n) ncols = j - i; 827a30f8f8cSSatish Balay else ncols = n - i; 828a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 8299566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 830a30f8f8cSSatish Balay i = j; 831a30f8f8cSSatish Balay } 832a30f8f8cSSatish Balay } 8339566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 834a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 835a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 836a30f8f8cSSatish Balay restore the original flags */ 837a30f8f8cSSatish Balay r1 = baij->roworiented; 838a30f8f8cSSatish Balay r2 = a->roworiented; 83991c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 84026fbe8dcSKarl Rupp 841a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 842a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 84326fbe8dcSKarl Rupp 84491c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 845a30f8f8cSSatish Balay while (1) { 8469566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 847a30f8f8cSSatish Balay if (!flg) break; 848a30f8f8cSSatish Balay 849a30f8f8cSSatish Balay for (i = 0; i < n;) { 850a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 85126fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 85226fbe8dcSKarl Rupp if (row[j] != rstart) break; 85326fbe8dcSKarl Rupp } 854a30f8f8cSSatish Balay if (j < n) ncols = j - i; 855a30f8f8cSSatish Balay else ncols = n - i; 8569566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 857a30f8f8cSSatish Balay i = j; 858a30f8f8cSSatish Balay } 859a30f8f8cSSatish Balay } 8609566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 86126fbe8dcSKarl Rupp 862a30f8f8cSSatish Balay baij->roworiented = r1; 863a30f8f8cSSatish Balay a->roworiented = r2; 86426fbe8dcSKarl Rupp 86591c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 866a30f8f8cSSatish Balay } 867a30f8f8cSSatish Balay 8689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->A, mode)); 8699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->A, mode)); 870a30f8f8cSSatish Balay 871a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 8726aad120cSJose E. Roman also disassemble ourselves, in order that we may reassemble. */ 873a30f8f8cSSatish Balay /* 874a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 875a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 876a30f8f8cSSatish Balay */ 877a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 8785f9db2b2SJunchao Zhang PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 87948a46eb9SPierre Jolivet if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 880a30f8f8cSSatish Balay } 881a30f8f8cSSatish Balay 8829371c9d4SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 8839566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->B, mode)); 8849566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->B, mode)); 885a30f8f8cSSatish Balay 8869566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 88726fbe8dcSKarl Rupp 888f4259b30SLisandro Dalcin baij->rowvalues = NULL; 8894f9cfa9eSBarry Smith 8904f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8914f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 892e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 8931c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 894e56f5c9eSBarry Smith } 8953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 896a30f8f8cSSatish Balay } 897a30f8f8cSSatish Balay 898dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 8999804daf3SBarry Smith #include <petscdraw.h> 900d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 901d71ae5a4SJacob Faibussowitsch { 902a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 903d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 9047da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 905ace3abfcSBarry Smith PetscBool iascii, isdraw; 906b0a32e0cSBarry Smith PetscViewer sviewer; 907f3ef73ceSBarry Smith PetscViewerFormat format; 908a30f8f8cSSatish Balay 909a30f8f8cSSatish Balay PetscFunctionBegin; 9109566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 9119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 91232077d6dSBarry Smith if (iascii) { 9139566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 914456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915a30f8f8cSSatish Balay MatInfo info; 9169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 9179566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 9189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 9199371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 9209371c9d4SSatish Balay mat->rmap->bs, (double)info.memory)); 9219566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 9229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9239566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 9249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9259566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 9279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 9289566063dSJacob Faibussowitsch PetscCall(VecScatterView(baij->Mvctx, viewer)); 9293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 930fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 9319566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 9323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 933c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 9343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 935a30f8f8cSSatish Balay } 936a30f8f8cSSatish Balay } 937a30f8f8cSSatish Balay 938a30f8f8cSSatish Balay if (isdraw) { 939b0a32e0cSBarry Smith PetscDraw draw; 940ace3abfcSBarry Smith PetscBool isnull; 9419566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 9429566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 9433ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 944a30f8f8cSSatish Balay } 945a30f8f8cSSatish Balay 9467da1fb6eSBarry Smith { 947a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 948a30f8f8cSSatish Balay Mat A; 94965d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 95065d70643SHong Zhang Mat_SeqBAIJ *Bloc; 951d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 952a30f8f8cSSatish Balay MatScalar *a; 9533e219373SBarry Smith const char *matname; 954a30f8f8cSSatish Balay 955f204ca49SKris Buschelman /* Should this be the same type as mat? */ 9569566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 957dd400576SPatrick Sanan if (rank == 0) { 9589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 959a30f8f8cSSatish Balay } else { 9609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 961a30f8f8cSSatish Balay } 9629566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPISBAIJ)); 9639566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 9649566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 965a30f8f8cSSatish Balay 966a30f8f8cSSatish Balay /* copy over the A part */ 96765d70643SHong Zhang Aloc = (Mat_SeqSBAIJ *)baij->A->data; 9689371c9d4SSatish Balay ai = Aloc->i; 9699371c9d4SSatish Balay aj = Aloc->j; 9709371c9d4SSatish Balay a = Aloc->a; 9719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 972a30f8f8cSSatish Balay 973a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 974e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 97526fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 977e9f7bc9eSHong Zhang col = (baij->cstartbs + aj[j]) * bs; 978a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9799566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 98026fbe8dcSKarl Rupp col++; 98126fbe8dcSKarl Rupp a += bs; 982a30f8f8cSSatish Balay } 983a30f8f8cSSatish Balay } 984a30f8f8cSSatish Balay } 985a30f8f8cSSatish Balay /* copy over the B part */ 98665d70643SHong Zhang Bloc = (Mat_SeqBAIJ *)baij->B->data; 9879371c9d4SSatish Balay ai = Bloc->i; 9889371c9d4SSatish Balay aj = Bloc->j; 9899371c9d4SSatish Balay a = Bloc->a; 990a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 991e9f7bc9eSHong Zhang rvals[0] = bs * (baij->rstartbs + i); 99226fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 993a30f8f8cSSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 994a30f8f8cSSatish Balay col = baij->garray[aj[j]] * bs; 995a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 9969566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 99726fbe8dcSKarl Rupp col++; 99826fbe8dcSKarl Rupp a += bs; 999a30f8f8cSSatish Balay } 1000a30f8f8cSSatish Balay } 1001a30f8f8cSSatish Balay } 10029566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 10039566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 10049566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1005a30f8f8cSSatish Balay /* 1006a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 1007b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 1008a30f8f8cSSatish Balay */ 10099566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 101023a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1011dd400576SPatrick Sanan if (rank == 0) { 101223a3927dSBarry Smith if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 10139566063dSJacob Faibussowitsch PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1014a30f8f8cSSatish Balay } 10159566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 10169566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 10179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1018a30f8f8cSSatish Balay } 10193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1020a30f8f8cSSatish Balay } 1021a30f8f8cSSatish Balay 1022618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1023618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1024d1654148SHong Zhang 1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1026d71ae5a4SJacob Faibussowitsch { 1027ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 1028a30f8f8cSSatish Balay 1029a30f8f8cSSatish Balay PetscFunctionBegin; 10309566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 10319566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 10329566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 10339566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1034d1654148SHong Zhang if (iascii || isdraw || issocket) { 10359566063dSJacob Faibussowitsch PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 10361baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 10373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1038a30f8f8cSSatish Balay } 1039a30f8f8cSSatish Balay 1040d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1041d71ae5a4SJacob Faibussowitsch { 1042547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1043eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 10446de40e93SBarry Smith PetscScalar *from; 10456de40e93SBarry Smith const PetscScalar *x; 1046547795f9SHong Zhang 1047547795f9SHong Zhang PetscFunctionBegin; 1048547795f9SHong Zhang /* diagonal part */ 10499566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10509566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1051547795f9SHong Zhang 1052547795f9SHong Zhang /* subdiagonal part */ 10535f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 10549566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1055547795f9SHong Zhang 1056547795f9SHong Zhang /* copy x into the vec slvec0 */ 10579566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1059547795f9SHong Zhang 10609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1063547795f9SHong Zhang 10649566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10659566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1066547795f9SHong Zhang /* supperdiagonal part */ 10679566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 10683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1069547795f9SHong Zhang } 1070547795f9SHong Zhang 1071d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1072d71ae5a4SJacob Faibussowitsch { 1073a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1074eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1075d9ca1df4SBarry Smith PetscScalar *from; 1076d9ca1df4SBarry Smith const PetscScalar *x; 1077a9d4b620SHong Zhang 1078a9d4b620SHong Zhang PetscFunctionBegin; 1079a9d4b620SHong Zhang /* diagonal part */ 10809566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 10819566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, 0.0)); 1082a9d4b620SHong Zhang 1083a9d4b620SHong Zhang /* subdiagonal part */ 10849566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1085fc165ae2SBarry Smith 1086a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 10879566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 10889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1089a9d4b620SHong Zhang 10909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 10919566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 10929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1093fc165ae2SBarry Smith 10949566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 10959566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1096a9d4b620SHong Zhang /* supperdiagonal part */ 10979566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 10983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1099a9d4b620SHong Zhang } 1100a9d4b620SHong Zhang 1101d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1102d71ae5a4SJacob Faibussowitsch { 1103eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1104eb1ec7c1SStefano Zampini PetscInt mbs = a->mbs, bs = A->rmap->bs; 1105eb1ec7c1SStefano Zampini PetscScalar *from, zero = 0.0; 1106eb1ec7c1SStefano Zampini const PetscScalar *x; 1107eb1ec7c1SStefano Zampini 1108eb1ec7c1SStefano Zampini PetscFunctionBegin; 1109eb1ec7c1SStefano Zampini /* diagonal part */ 11109566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11119566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1112eb1ec7c1SStefano Zampini 1113eb1ec7c1SStefano Zampini /* subdiagonal part */ 11145f80ce2aSJacob Faibussowitsch PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 11159566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1116eb1ec7c1SStefano Zampini 1117eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 11189566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11219566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1122eb1ec7c1SStefano Zampini 11239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11259566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1126eb1ec7c1SStefano Zampini 1127eb1ec7c1SStefano Zampini /* supperdiagonal part */ 11289566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1130eb1ec7c1SStefano Zampini } 1131eb1ec7c1SStefano Zampini 1132d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1133d71ae5a4SJacob Faibussowitsch { 1134de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1135d0f46423SBarry Smith PetscInt mbs = a->mbs, bs = A->rmap->bs; 1136d9ca1df4SBarry Smith PetscScalar *from, zero = 0.0; 1137d9ca1df4SBarry Smith const PetscScalar *x; 1138a9d4b620SHong Zhang 1139a9d4b620SHong Zhang PetscFunctionBegin; 1140a9d4b620SHong Zhang /* diagonal part */ 11419566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 11429566063dSJacob Faibussowitsch PetscCall(VecSet(a->slvec1b, zero)); 1143a9d4b620SHong Zhang 1144a9d4b620SHong Zhang /* subdiagonal part */ 11459566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1146a9d4b620SHong Zhang 1147a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11489566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &from)); 11499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 11509566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 11519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &from)); 1152a9d4b620SHong Zhang 11539566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 11549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 11559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1156a9d4b620SHong Zhang 1157a9d4b620SHong Zhang /* supperdiagonal part */ 11589566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 11593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1160a9d4b620SHong Zhang } 1161a9d4b620SHong Zhang 1162a30f8f8cSSatish Balay /* 1163a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1164a30f8f8cSSatish Balay diagonal block 1165a30f8f8cSSatish Balay */ 1166d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1167d71ae5a4SJacob Faibussowitsch { 1168a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1169a30f8f8cSSatish Balay 1170a30f8f8cSSatish Balay PetscFunctionBegin; 117108401ef6SPierre Jolivet /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 11729566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A, v)); 11733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1174a30f8f8cSSatish Balay } 1175a30f8f8cSSatish Balay 1176d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1177d71ae5a4SJacob Faibussowitsch { 1178a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1179a30f8f8cSSatish Balay 1180a30f8f8cSSatish Balay PetscFunctionBegin; 11819566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 11829566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 11833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1184a30f8f8cSSatish Balay } 1185a30f8f8cSSatish Balay 1186d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1187d71ae5a4SJacob Faibussowitsch { 1188d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1189d0d4cfc2SHong Zhang PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1190d0f46423SBarry Smith PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1191d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1192899cda47SBarry Smith PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1193d0d4cfc2SHong Zhang 1194a30f8f8cSSatish Balay PetscFunctionBegin; 11955f80ce2aSJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1196d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1197d0d4cfc2SHong Zhang 1198d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1199d0d4cfc2SHong Zhang /* 1200d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1201d0d4cfc2SHong Zhang */ 1202d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1203d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1204d0d4cfc2SHong Zhang PetscInt max = 1, mbs = mat->mbs, tmp; 1205d0d4cfc2SHong Zhang for (i = 0; i < mbs; i++) { 1206d0d4cfc2SHong Zhang tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 120726fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1208d0d4cfc2SHong Zhang } 12099566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1210d0d4cfc2SHong Zhang } 1211d0d4cfc2SHong Zhang 12125f80ce2aSJacob Faibussowitsch PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1213d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1214d0d4cfc2SHong Zhang 12159371c9d4SSatish Balay pvA = &vworkA; 12169371c9d4SSatish Balay pcA = &cworkA; 12179371c9d4SSatish Balay pvB = &vworkB; 12189371c9d4SSatish Balay pcB = &cworkB; 12199371c9d4SSatish Balay if (!v) { 12209371c9d4SSatish Balay pvA = NULL; 12219371c9d4SSatish Balay pvB = NULL; 12229371c9d4SSatish Balay } 12239371c9d4SSatish Balay if (!idx) { 12249371c9d4SSatish Balay pcA = NULL; 12259371c9d4SSatish Balay if (!v) pcB = NULL; 12269371c9d4SSatish Balay } 12279566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 12289566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1229d0d4cfc2SHong Zhang nztot = nzA + nzB; 1230d0d4cfc2SHong Zhang 1231d0d4cfc2SHong Zhang cmap = mat->garray; 1232d0d4cfc2SHong Zhang if (v || idx) { 1233d0d4cfc2SHong Zhang if (nztot) { 1234d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1235d0d4cfc2SHong Zhang PetscInt imark = -1; 1236d0d4cfc2SHong Zhang if (v) { 1237d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1238d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 1239d0d4cfc2SHong Zhang if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1240d0d4cfc2SHong Zhang else break; 1241d0d4cfc2SHong Zhang } 1242d0d4cfc2SHong Zhang imark = i; 1243d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1244d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1245d0d4cfc2SHong Zhang } 1246d0d4cfc2SHong Zhang if (idx) { 1247d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1248d0d4cfc2SHong Zhang if (imark > -1) { 1249ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1250d0d4cfc2SHong Zhang } else { 1251d0d4cfc2SHong Zhang for (i = 0; i < nzB; i++) { 125226fbe8dcSKarl Rupp if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253d0d4cfc2SHong Zhang else break; 1254d0d4cfc2SHong Zhang } 1255d0d4cfc2SHong Zhang imark = i; 1256d0d4cfc2SHong Zhang } 1257d0d4cfc2SHong Zhang for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1258d0d4cfc2SHong Zhang for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1259d0d4cfc2SHong Zhang } 1260d0d4cfc2SHong Zhang } else { 1261f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1262f4259b30SLisandro Dalcin if (v) *v = NULL; 1263d0d4cfc2SHong Zhang } 1264d0d4cfc2SHong Zhang } 1265d0d4cfc2SHong Zhang *nz = nztot; 12669566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 12679566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 12683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1269a30f8f8cSSatish Balay } 1270a30f8f8cSSatish Balay 1271d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1272d71ae5a4SJacob Faibussowitsch { 1273a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1274a30f8f8cSSatish Balay 1275a30f8f8cSSatish Balay PetscFunctionBegin; 12765f80ce2aSJacob Faibussowitsch PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1277a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 12783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1279a30f8f8cSSatish Balay } 1280a30f8f8cSSatish Balay 1281d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1282d71ae5a4SJacob Faibussowitsch { 1283d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1284d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1285d0d4cfc2SHong Zhang 1286d0d4cfc2SHong Zhang PetscFunctionBegin; 1287d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 12883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1289d0d4cfc2SHong Zhang } 1290d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1291d71ae5a4SJacob Faibussowitsch { 1292d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1293d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1294d0d4cfc2SHong Zhang 1295d0d4cfc2SHong Zhang PetscFunctionBegin; 1296d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 12973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1298d0d4cfc2SHong Zhang } 1299d0d4cfc2SHong Zhang 1300d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1301d71ae5a4SJacob Faibussowitsch { 13025f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 13035f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 13042726fb6dSPierre Jolivet Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 13052726fb6dSPierre Jolivet 13069566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 13079566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->B)); 13085f80ce2aSJacob Faibussowitsch } 13093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13102726fb6dSPierre Jolivet } 13112726fb6dSPierre Jolivet 1312d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1313d71ae5a4SJacob Faibussowitsch { 131499cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 131599cafbc1SBarry Smith 131699cafbc1SBarry Smith PetscFunctionBegin; 13179566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 13189566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 13193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132099cafbc1SBarry Smith } 132199cafbc1SBarry Smith 1322d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1323d71ae5a4SJacob Faibussowitsch { 132499cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 132599cafbc1SBarry Smith 132699cafbc1SBarry Smith PetscFunctionBegin; 13279566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 13289566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 13293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 133099cafbc1SBarry Smith } 133199cafbc1SBarry Smith 13327dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 133336032a97SHong Zhang Input: isrow - distributed(parallel), 133436032a97SHong Zhang iscol_local - locally owned (seq) 133536032a97SHong Zhang */ 1336d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1337d71ae5a4SJacob Faibussowitsch { 13388f46ffcaSHong Zhang PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 13398f46ffcaSHong Zhang const PetscInt *ptr1, *ptr2; 134036032a97SHong Zhang 134136032a97SHong Zhang PetscFunctionBegin; 13429566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &sz1)); 13439566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local, &sz2)); 13441098a8e8SHong Zhang if (sz1 > sz2) { 13451098a8e8SHong Zhang *flg = PETSC_FALSE; 13463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13471098a8e8SHong Zhang } 13488f46ffcaSHong Zhang 13499566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &ptr1)); 13509566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &ptr2)); 13518f46ffcaSHong Zhang 13529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz1, &a1)); 13539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sz2, &a2)); 13549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a1, ptr1, sz1)); 13559566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a2, ptr2, sz2)); 13569566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz1, a1)); 13579566063dSJacob Faibussowitsch PetscCall(PetscSortInt(sz2, a2)); 13588f46ffcaSHong Zhang 13598f46ffcaSHong Zhang nmatch = 0; 13608f46ffcaSHong Zhang k = 0; 13618f46ffcaSHong Zhang for (i = 0; i < sz1; i++) { 13628f46ffcaSHong Zhang for (j = k; j < sz2; j++) { 13638f46ffcaSHong Zhang if (a1[i] == a2[j]) { 13649371c9d4SSatish Balay k = j; 13659371c9d4SSatish Balay nmatch++; 13668f46ffcaSHong Zhang break; 13678f46ffcaSHong Zhang } 13688f46ffcaSHong Zhang } 13698f46ffcaSHong Zhang } 13709566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &ptr1)); 13719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 13729566063dSJacob Faibussowitsch PetscCall(PetscFree(a1)); 13739566063dSJacob Faibussowitsch PetscCall(PetscFree(a2)); 13741098a8e8SHong Zhang if (nmatch < sz1) { 13751098a8e8SHong Zhang *flg = PETSC_FALSE; 13761098a8e8SHong Zhang } else { 13771098a8e8SHong Zhang *flg = PETSC_TRUE; 13781098a8e8SHong Zhang } 13793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13808f46ffcaSHong Zhang } 138136032a97SHong Zhang 1382d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1383d71ae5a4SJacob Faibussowitsch { 138436032a97SHong Zhang IS iscol_local; 138536032a97SHong Zhang PetscInt csize; 138636032a97SHong Zhang PetscBool isequal; 138736032a97SHong Zhang 138836032a97SHong Zhang PetscFunctionBegin; 13899566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 139036032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 13919566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 13925f80ce2aSJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 139336032a97SHong Zhang } else { 1394068661f9SToby Isaac PetscBool issorted; 1395068661f9SToby Isaac 13969566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 13979566063dSJacob Faibussowitsch PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 13989566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local, &issorted)); 13995f80ce2aSJacob Faibussowitsch PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 14008f46ffcaSHong Zhang } 14018f46ffcaSHong Zhang 14027dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 14039566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 14048f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 14059566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 14069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 14078f46ffcaSHong Zhang } 14083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14098f46ffcaSHong Zhang } 14108f46ffcaSHong Zhang 1411d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1412d71ae5a4SJacob Faibussowitsch { 1413a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1414a30f8f8cSSatish Balay 1415a30f8f8cSSatish Balay PetscFunctionBegin; 14169566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 14179566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 14183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1419a30f8f8cSSatish Balay } 1420a30f8f8cSSatish Balay 1421d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1422d71ae5a4SJacob Faibussowitsch { 1423a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1424a30f8f8cSSatish Balay Mat A = a->A, B = a->B; 14253966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 1426a30f8f8cSSatish Balay 1427a30f8f8cSSatish Balay PetscFunctionBegin; 1428d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 142926fbe8dcSKarl Rupp 14309566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 143126fbe8dcSKarl Rupp 14329371c9d4SSatish Balay isend[0] = info->nz_used; 14339371c9d4SSatish Balay isend[1] = info->nz_allocated; 14349371c9d4SSatish Balay isend[2] = info->nz_unneeded; 14359371c9d4SSatish Balay isend[3] = info->memory; 14369371c9d4SSatish Balay isend[4] = info->mallocs; 143726fbe8dcSKarl Rupp 14389566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 143926fbe8dcSKarl Rupp 14409371c9d4SSatish Balay isend[0] += info->nz_used; 14419371c9d4SSatish Balay isend[1] += info->nz_allocated; 14429371c9d4SSatish Balay isend[2] += info->nz_unneeded; 14439371c9d4SSatish Balay isend[3] += info->memory; 14449371c9d4SSatish Balay isend[4] += info->mallocs; 1445a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1446a30f8f8cSSatish Balay info->nz_used = isend[0]; 1447a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1448a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1449a30f8f8cSSatish Balay info->memory = isend[3]; 1450a30f8f8cSSatish Balay info->mallocs = isend[4]; 1451a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14521c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 145326fbe8dcSKarl Rupp 1454a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1455a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1456a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1457a30f8f8cSSatish Balay info->memory = irecv[3]; 1458a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1459a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14601c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 146126fbe8dcSKarl Rupp 1462a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1463a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1464a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1465a30f8f8cSSatish Balay info->memory = irecv[3]; 1466a30f8f8cSSatish Balay info->mallocs = irecv[4]; 146798921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1468a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1469a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1470a30f8f8cSSatish Balay info->factor_mallocs = 0; 14713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1472a30f8f8cSSatish Balay } 1473a30f8f8cSSatish Balay 1474d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1475d71ae5a4SJacob Faibussowitsch { 1476a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1477d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1478a30f8f8cSSatish Balay 1479a30f8f8cSSatish Balay PetscFunctionBegin; 1480e98b92d7SKris Buschelman switch (op) { 1481512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1482e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 148328b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1484a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1485c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1486e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 148743674050SBarry Smith MatCheckPreallocated(A, 1); 14889566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14899566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1490e98b92d7SKris Buschelman break; 1491e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 149243674050SBarry Smith MatCheckPreallocated(A, 1); 14934e0d8c25SBarry Smith a->roworiented = flg; 149426fbe8dcSKarl Rupp 14959566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14969566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 1497e98b92d7SKris Buschelman break; 14988c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1499d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1500d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1501d71ae5a4SJacob Faibussowitsch break; 1502d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 1503d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 1504d71ae5a4SJacob Faibussowitsch break; 1505d71ae5a4SJacob Faibussowitsch case MAT_USE_HASH_TABLE: 1506d71ae5a4SJacob Faibussowitsch a->ht_flag = flg; 1507d71ae5a4SJacob Faibussowitsch break; 1508d71ae5a4SJacob Faibussowitsch case MAT_HERMITIAN: 1509d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1510d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 15110f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1512eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1513547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1514eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1515eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1516eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1517b94d7dedSBarry Smith A->symmetric = PETSC_BOOL3_FALSE; 1518eb1ec7c1SStefano Zampini } 15190f2140c7SStefano Zampini #endif 1520eeffb40dSHong Zhang break; 1521ffa07934SHong Zhang case MAT_SPD: 1522d71ae5a4SJacob Faibussowitsch case MAT_SYMMETRIC: 1523d71ae5a4SJacob Faibussowitsch MatCheckPreallocated(A, 1); 1524d71ae5a4SJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1525eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1526eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1527eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1528eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1529eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1530eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1531eb1ec7c1SStefano Zampini } 1532eb1ec7c1SStefano Zampini #endif 1533eeffb40dSHong Zhang break; 153477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 153543674050SBarry Smith MatCheckPreallocated(A, 1); 15369566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 1537eeffb40dSHong Zhang break; 15389a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1539b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 15405f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 15419566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 154277e54ba9SKris Buschelman break; 1543d71ae5a4SJacob Faibussowitsch case MAT_SPD_ETERNAL: 1544d71ae5a4SJacob Faibussowitsch break; 1545d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_LOWER_TRIANGULAR: 1546d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1547d71ae5a4SJacob Faibussowitsch break; 1548d71ae5a4SJacob Faibussowitsch case MAT_ERROR_LOWER_TRIANGULAR: 1549d71ae5a4SJacob Faibussowitsch aA->ignore_ltriangular = flg; 1550d71ae5a4SJacob Faibussowitsch break; 1551d71ae5a4SJacob Faibussowitsch case MAT_GETROW_UPPERTRIANGULAR: 1552d71ae5a4SJacob Faibussowitsch aA->getrow_utriangular = flg; 1553d71ae5a4SJacob Faibussowitsch break; 1554d71ae5a4SJacob Faibussowitsch default: 1555d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1556a30f8f8cSSatish Balay } 15573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1558a30f8f8cSSatish Balay } 1559a30f8f8cSSatish Balay 1560d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1561d71ae5a4SJacob Faibussowitsch { 1562a30f8f8cSSatish Balay PetscFunctionBegin; 15637fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1564cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 15659566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1566cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 15679566063dSJacob Faibussowitsch PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1568fc4dec0aSBarry Smith } 15693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1570a30f8f8cSSatish Balay } 1571a30f8f8cSSatish Balay 1572d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1573d71ae5a4SJacob Faibussowitsch { 1574a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1575a30f8f8cSSatish Balay Mat a = baij->A, b = baij->B; 15765e90f9d9SHong Zhang PetscInt nv, m, n; 1577ace3abfcSBarry Smith PetscBool flg; 1578a30f8f8cSSatish Balay 1579a30f8f8cSSatish Balay PetscFunctionBegin; 1580a30f8f8cSSatish Balay if (ll != rr) { 15819566063dSJacob Faibussowitsch PetscCall(VecEqual(ll, rr, &flg)); 15825f80ce2aSJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1583a30f8f8cSSatish Balay } 15843ba16761SJacob Faibussowitsch if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1585b3bf805bSHong Zhang 15869566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 15875f80ce2aSJacob Faibussowitsch PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1588b3bf805bSHong Zhang 15899566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &nv)); 15905f80ce2aSJacob Faibussowitsch PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 15915e90f9d9SHong Zhang 15929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 15935e90f9d9SHong Zhang 15945e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 1595dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 15965e90f9d9SHong Zhang 15975e90f9d9SHong Zhang /* scale the diagonal part */ 1598dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 1599a30f8f8cSSatish Balay 16005e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 16019566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1602dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 16033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1604a30f8f8cSSatish Balay } 1605a30f8f8cSSatish Balay 1606d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1607d71ae5a4SJacob Faibussowitsch { 1608f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1609a30f8f8cSSatish Balay 1610a30f8f8cSSatish Balay PetscFunctionBegin; 16119566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 16123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1613a30f8f8cSSatish Balay } 1614a30f8f8cSSatish Balay 16156849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1616a30f8f8cSSatish Balay 1617d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1618d71ae5a4SJacob Faibussowitsch { 1619a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1620a30f8f8cSSatish Balay Mat a, b, c, d; 1621ace3abfcSBarry Smith PetscBool flg; 1622a30f8f8cSSatish Balay 1623a30f8f8cSSatish Balay PetscFunctionBegin; 16249371c9d4SSatish Balay a = matA->A; 16259371c9d4SSatish Balay b = matA->B; 16269371c9d4SSatish Balay c = matB->A; 16279371c9d4SSatish Balay d = matB->B; 1628a30f8f8cSSatish Balay 16299566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 163048a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 16311c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 16323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1633a30f8f8cSSatish Balay } 1634a30f8f8cSSatish Balay 1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1636d71ae5a4SJacob Faibussowitsch { 16374c7a3774SStefano Zampini PetscBool isbaij; 16383c896bc6SHong Zhang 16393c896bc6SHong Zhang PetscFunctionBegin; 16409566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 16415f80ce2aSJacob Faibussowitsch PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 16423c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16433c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 16449566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(A)); 16459566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 16469566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(A)); 16473c896bc6SHong Zhang } else { 16484c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 16494c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 16504c7a3774SStefano Zampini 16519566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 16529566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 16533c896bc6SHong Zhang } 16549566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 16553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16563c896bc6SHong Zhang } 16573c896bc6SHong Zhang 1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1659d71ae5a4SJacob Faibussowitsch { 16604fe895cdSHong Zhang Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 16614fe895cdSHong Zhang PetscBLASInt bnz, one = 1; 16624fe895cdSHong Zhang Mat_SeqSBAIJ *xa, *ya; 16634fe895cdSHong Zhang Mat_SeqBAIJ *xb, *yb; 16644fe895cdSHong Zhang 16654fe895cdSHong Zhang PetscFunctionBegin; 16664fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 16674fe895cdSHong Zhang PetscScalar alpha = a; 16684fe895cdSHong Zhang xa = (Mat_SeqSBAIJ *)xx->A->data; 16694fe895cdSHong Zhang ya = (Mat_SeqSBAIJ *)yy->A->data; 16709566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1671792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 16724fe895cdSHong Zhang xb = (Mat_SeqBAIJ *)xx->B->data; 16734fe895cdSHong Zhang yb = (Mat_SeqBAIJ *)yy->B->data; 16749566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1675792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 16769566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1677ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 16789566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 16799566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 16809566063dSJacob Faibussowitsch PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 16814fe895cdSHong Zhang } else { 16824de5dceeSHong Zhang Mat B; 16834de5dceeSHong Zhang PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 16845f80ce2aSJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 16859566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(X)); 16869566063dSJacob Faibussowitsch PetscCall(MatGetRowUpperTriangular(Y)); 16879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 16889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 16899566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 16909566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 16919566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 16929566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 16939566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPISBAIJ)); 16949566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 16959566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 16969566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 16979566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 16989566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 16999566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 17009566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 17019566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(X)); 17029566063dSJacob Faibussowitsch PetscCall(MatRestoreRowUpperTriangular(Y)); 17034fe895cdSHong Zhang } 17043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17054fe895cdSHong Zhang } 17064fe895cdSHong Zhang 1707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1708d71ae5a4SJacob Faibussowitsch { 17091302d50aSBarry Smith PetscInt i; 1710afebec48SHong Zhang PetscBool flg; 1711a5e6ed63SBarry Smith 17126849ba73SBarry Smith PetscFunctionBegin; 17139566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1714a5e6ed63SBarry Smith for (i = 0; i < n; i++) { 17159566063dSJacob Faibussowitsch PetscCall(ISEqual(irow[i], icol[i], &flg)); 171648a46eb9SPierre Jolivet if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 17174dcd73b1SHong Zhang } 17183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1719a5e6ed63SBarry Smith } 1720a5e6ed63SBarry Smith 1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1722d71ae5a4SJacob Faibussowitsch { 17237d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 17246f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 17257d68702bSBarry Smith 17267d68702bSBarry Smith PetscFunctionBegin; 17276f33a894SBarry Smith if (!Y->preallocated) { 17289566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 17296f33a894SBarry Smith } else if (!aij->nz) { 1730b83222d8SBarry Smith PetscInt nonew = aij->nonew; 17319566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1732b83222d8SBarry Smith aij->nonew = nonew; 17337d68702bSBarry Smith } 17349566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 17353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17367d68702bSBarry Smith } 17377d68702bSBarry Smith 1738d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1739d71ae5a4SJacob Faibussowitsch { 17403b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 17413b49f96aSBarry Smith 17423b49f96aSBarry Smith PetscFunctionBegin; 17435f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 17449566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 17453b49f96aSBarry Smith if (d) { 17463b49f96aSBarry Smith PetscInt rstart; 17479566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 17483b49f96aSBarry Smith *d += rstart / A->rmap->bs; 17493b49f96aSBarry Smith } 17503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17513b49f96aSBarry Smith } 17523b49f96aSBarry Smith 1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1754d71ae5a4SJacob Faibussowitsch { 1755a5b7ff6bSBarry Smith PetscFunctionBegin; 1756a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ *)A->data)->A; 17573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1758a5b7ff6bSBarry Smith } 17593b49f96aSBarry Smith 17603964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1761a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1762a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1763a9d4b620SHong Zhang MatMult_MPISBAIJ, 176497304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1765431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1766431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1767f4259b30SLisandro Dalcin NULL, 1768f4259b30SLisandro Dalcin NULL, 1769f4259b30SLisandro Dalcin NULL, 1770f4259b30SLisandro Dalcin /* 10*/ NULL, 1771f4259b30SLisandro Dalcin NULL, 1772f4259b30SLisandro Dalcin NULL, 177341f059aeSBarry Smith MatSOR_MPISBAIJ, 1774a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 177597304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1776a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1777a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1778a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1779a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 178097304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1781a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1782a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1783a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1784f4259b30SLisandro Dalcin /* 24*/ NULL, 1785f4259b30SLisandro Dalcin NULL, 1786f4259b30SLisandro Dalcin NULL, 1787f4259b30SLisandro Dalcin NULL, 1788f4259b30SLisandro Dalcin NULL, 178926cec326SBarry Smith /* 29*/ MatSetUp_MPI_Hash, 1790f4259b30SLisandro Dalcin NULL, 1791f4259b30SLisandro Dalcin NULL, 1792a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1793f4259b30SLisandro Dalcin NULL, 1794d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1795f4259b30SLisandro Dalcin NULL, 1796f4259b30SLisandro Dalcin NULL, 1797f4259b30SLisandro Dalcin NULL, 1798f4259b30SLisandro Dalcin NULL, 1799d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18007dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1801d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1802a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18033c896bc6SHong Zhang MatCopy_MPISBAIJ, 1804f4259b30SLisandro Dalcin /* 44*/ NULL, 1805a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18067d68702bSBarry Smith MatShift_MPISBAIJ, 1807f4259b30SLisandro Dalcin NULL, 1808f4259b30SLisandro Dalcin NULL, 1809f4259b30SLisandro Dalcin /* 49*/ NULL, 1810f4259b30SLisandro Dalcin NULL, 1811f4259b30SLisandro Dalcin NULL, 1812f4259b30SLisandro Dalcin NULL, 1813f4259b30SLisandro Dalcin NULL, 1814f4259b30SLisandro Dalcin /* 54*/ NULL, 1815f4259b30SLisandro Dalcin NULL, 1816a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1817f4259b30SLisandro Dalcin NULL, 1818a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18197dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1820f4259b30SLisandro Dalcin NULL, 1821f4259b30SLisandro Dalcin NULL, 1822f4259b30SLisandro Dalcin NULL, 1823f4259b30SLisandro Dalcin NULL, 1824f4259b30SLisandro Dalcin /* 64*/ NULL, 1825f4259b30SLisandro Dalcin NULL, 1826f4259b30SLisandro Dalcin NULL, 1827f4259b30SLisandro Dalcin NULL, 1828f4259b30SLisandro Dalcin NULL, 1829d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1830f4259b30SLisandro Dalcin NULL, 183128d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 1832f4259b30SLisandro Dalcin NULL, 1833f4259b30SLisandro Dalcin NULL, 1834f4259b30SLisandro Dalcin /* 74*/ NULL, 1835f4259b30SLisandro Dalcin NULL, 1836f4259b30SLisandro Dalcin NULL, 1837f4259b30SLisandro Dalcin NULL, 1838f4259b30SLisandro Dalcin NULL, 1839f4259b30SLisandro Dalcin /* 79*/ NULL, 1840f4259b30SLisandro Dalcin NULL, 1841f4259b30SLisandro Dalcin NULL, 1842f4259b30SLisandro Dalcin NULL, 18435bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1844f4259b30SLisandro Dalcin /* 84*/ NULL, 1845f4259b30SLisandro Dalcin NULL, 1846f4259b30SLisandro Dalcin NULL, 1847f4259b30SLisandro Dalcin NULL, 1848f4259b30SLisandro Dalcin NULL, 1849f4259b30SLisandro Dalcin /* 89*/ NULL, 1850f4259b30SLisandro Dalcin NULL, 1851f4259b30SLisandro Dalcin NULL, 1852f4259b30SLisandro Dalcin NULL, 1853f4259b30SLisandro Dalcin NULL, 1854f4259b30SLisandro Dalcin /* 94*/ NULL, 1855f4259b30SLisandro Dalcin NULL, 1856f4259b30SLisandro Dalcin NULL, 1857f4259b30SLisandro Dalcin NULL, 1858f4259b30SLisandro Dalcin NULL, 1859f4259b30SLisandro Dalcin /* 99*/ NULL, 1860f4259b30SLisandro Dalcin NULL, 1861f4259b30SLisandro Dalcin NULL, 18622726fb6dSPierre Jolivet MatConjugate_MPISBAIJ, 1863f4259b30SLisandro Dalcin NULL, 1864f4259b30SLisandro Dalcin /*104*/ NULL, 186599cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1866d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1867d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 186895936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 1869f4259b30SLisandro Dalcin /*109*/ NULL, 1870f4259b30SLisandro Dalcin NULL, 1871f4259b30SLisandro Dalcin NULL, 1872f4259b30SLisandro Dalcin NULL, 18733b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 1874f4259b30SLisandro Dalcin /*114*/ NULL, 1875f4259b30SLisandro Dalcin NULL, 1876f4259b30SLisandro Dalcin NULL, 1877f4259b30SLisandro Dalcin NULL, 1878f4259b30SLisandro Dalcin NULL, 1879f4259b30SLisandro Dalcin /*119*/ NULL, 1880f4259b30SLisandro Dalcin NULL, 1881f4259b30SLisandro Dalcin NULL, 1882f4259b30SLisandro Dalcin NULL, 1883f4259b30SLisandro Dalcin NULL, 1884f4259b30SLisandro Dalcin /*124*/ NULL, 1885f4259b30SLisandro Dalcin NULL, 1886f4259b30SLisandro Dalcin NULL, 1887f4259b30SLisandro Dalcin NULL, 1888f4259b30SLisandro Dalcin NULL, 1889f4259b30SLisandro Dalcin /*129*/ NULL, 1890f4259b30SLisandro Dalcin NULL, 1891f4259b30SLisandro Dalcin NULL, 1892f4259b30SLisandro Dalcin NULL, 1893f4259b30SLisandro Dalcin NULL, 1894f4259b30SLisandro Dalcin /*134*/ NULL, 1895f4259b30SLisandro Dalcin NULL, 1896f4259b30SLisandro Dalcin NULL, 1897f4259b30SLisandro Dalcin NULL, 1898f4259b30SLisandro Dalcin NULL, 189946533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1900f4259b30SLisandro Dalcin NULL, 1901f4259b30SLisandro Dalcin NULL, 1902f4259b30SLisandro Dalcin NULL, 1903f4259b30SLisandro Dalcin NULL, 1904d70f29a3SPierre Jolivet /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1905d70f29a3SPierre Jolivet NULL, 1906d70f29a3SPierre Jolivet NULL, 190799a7f59eSMark Adams NULL, 190899a7f59eSMark Adams NULL, 19097fb60732SBarry Smith NULL, 1910dec0b466SHong Zhang /*150*/ NULL, 1911dec0b466SHong Zhang NULL}; 1912a30f8f8cSSatish Balay 1913d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1914d71ae5a4SJacob Faibussowitsch { 1915476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1916535b19f3SBarry Smith PetscInt i, mbs, Mbs; 19175d2a9ed1SStefano Zampini PetscMPIInt size; 1918a23d5eceSKris Buschelman 1919a23d5eceSKris Buschelman PetscFunctionBegin; 1920ad79cf63SBarry Smith if (B->hash_active) { 1921ad79cf63SBarry Smith PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 1922ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 1923ad79cf63SBarry Smith } 1924ad79cf63SBarry Smith if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 19259566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 19269566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 19289566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 19295f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 19305f80ce2aSJacob Faibussowitsch PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1931899cda47SBarry Smith 1932d0f46423SBarry Smith mbs = B->rmap->n / bs; 1933d0f46423SBarry Smith Mbs = B->rmap->N / bs; 19345f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1935a23d5eceSKris Buschelman 1936d0f46423SBarry Smith B->rmap->bs = bs; 1937a23d5eceSKris Buschelman b->bs2 = bs * bs; 1938a23d5eceSKris Buschelman b->mbs = mbs; 1939a23d5eceSKris Buschelman b->Mbs = Mbs; 1940de64b629SHong Zhang b->nbs = B->cmap->n / bs; 1941de64b629SHong Zhang b->Nbs = B->cmap->N / bs; 1942a23d5eceSKris Buschelman 1943ad540459SPierre Jolivet for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1944d0f46423SBarry Smith b->rstartbs = B->rmap->rstart / bs; 1945d0f46423SBarry Smith b->rendbs = B->rmap->rend / bs; 1946a23d5eceSKris Buschelman 1947d0f46423SBarry Smith b->cstartbs = B->cmap->rstart / bs; 1948d0f46423SBarry Smith b->cendbs = B->cmap->rend / bs; 1949a23d5eceSKris Buschelman 1950cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 1951eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 1952cb7b82ddSBarry Smith #else 19539566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 1954cb7b82ddSBarry Smith #endif 19559566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 19569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 19579566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 19589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0)); 19599566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec0b)); 19609566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1)); 19619566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1a)); 19629566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->slvec1b)); 19639566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->sMvctx)); 1964cb7b82ddSBarry Smith 19659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 19669566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 19679566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 19689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 19699566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1970cb7b82ddSBarry Smith 1971ad79cf63SBarry Smith PetscCall(MatDestroy(&b->A)); 19729566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 19739566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 19749566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1975a23d5eceSKris Buschelman 19769566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 19779566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 197826fbe8dcSKarl Rupp 1979526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 1980cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 1981cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 19823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1983a23d5eceSKris Buschelman } 1984a23d5eceSKris Buschelman 1985d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1986d71ae5a4SJacob Faibussowitsch { 198702106b30SBarry Smith PetscInt m, rstart, cend; 1988f4259b30SLisandro Dalcin PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1989f4259b30SLisandro Dalcin const PetscInt *JJ = NULL; 1990f4259b30SLisandro Dalcin PetscScalar *values = NULL; 1991bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 19923bd0feecSPierre Jolivet PetscBool nooffprocentries; 1993dfb205c3SBarry Smith 1994dfb205c3SBarry Smith PetscFunctionBegin; 19955f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 19969566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 19979566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 19989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 19999566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 20009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2001dfb205c3SBarry Smith m = B->rmap->n / bs; 2002dfb205c3SBarry Smith rstart = B->rmap->rstart / bs; 2003dfb205c3SBarry Smith cend = B->cmap->rend / bs; 2004dfb205c3SBarry Smith 20055f80ce2aSJacob Faibussowitsch PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 20069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2007dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2008dfb205c3SBarry Smith nz = ii[i + 1] - ii[i]; 20095f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 20100cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2011dfb205c3SBarry Smith JJ = jj + ii[i]; 20120cd7f59aSBarry Smith bd = 0; 2013dfb205c3SBarry Smith for (j = 0; j < nz; j++) { 20140cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2015dfb205c3SBarry Smith JJ++; 20160cd7f59aSBarry Smith bd++; 2017dfb205c3SBarry Smith } 2018dfb205c3SBarry Smith d = 0; 2019dfb205c3SBarry Smith for (; j < nz; j++) { 2020dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2021dfb205c3SBarry Smith d++; 2022dfb205c3SBarry Smith } 2023dfb205c3SBarry Smith d_nnz[i] = d; 20240cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20250cd7f59aSBarry Smith nz = nz - bd; 20260cd7f59aSBarry Smith nz_max = PetscMax(nz_max, nz); 2027dfb205c3SBarry Smith } 20289566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 20299566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 20309566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 2031dfb205c3SBarry Smith 2032dfb205c3SBarry Smith values = (PetscScalar *)V; 203348a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2034dfb205c3SBarry Smith for (i = 0; i < m; i++) { 2035dfb205c3SBarry Smith PetscInt row = i + rstart; 2036dfb205c3SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 2037dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2038bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2039dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 20409566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2041bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2042bb80cfbbSStefano Zampini PetscInt j; 20430cd7f59aSBarry Smith for (j = 0; j < ncols; j++) { 20440cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 20459566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 20460cd7f59aSBarry Smith } 20470cd7f59aSBarry Smith } 2048dfb205c3SBarry Smith } 2049dfb205c3SBarry Smith 20509566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 20513bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 20523bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 20539566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 20549566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 20553bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 20563bd0feecSPierre Jolivet 20579566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 20583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2059dfb205c3SBarry Smith } 2060dfb205c3SBarry Smith 20610bad9183SKris Buschelman /*MC 2062fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2063828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2064828413b8SBarry Smith the matrix is stored. 2065828413b8SBarry Smith 2066828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 206711a5261eSBarry Smith can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 20680bad9183SKris Buschelman 20692ef1f0ffSBarry Smith Options Database Key: 207011a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 20710bad9183SKris Buschelman 20722ef1f0ffSBarry Smith Level: beginner 20732ef1f0ffSBarry Smith 207411a5261eSBarry Smith Note: 2075476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2076476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2077476417e5SBarry Smith 2078*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 20790bad9183SKris Buschelman M*/ 20800bad9183SKris Buschelman 2081d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2082d71ae5a4SJacob Faibussowitsch { 2083b5df2d14SHong Zhang Mat_MPISBAIJ *b; 208494ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2085b5df2d14SHong Zhang 2086b5df2d14SHong Zhang PetscFunctionBegin; 20874dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 2088b0a32e0cSBarry Smith B->data = (void *)b; 20899566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2090b5df2d14SHong Zhang 2091b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2092b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2093b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2094b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 209526fbe8dcSKarl Rupp 20969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 20979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2098b5df2d14SHong Zhang 2099b5df2d14SHong Zhang /* build local table of row and column ownerships */ 21009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2101b5df2d14SHong Zhang 2102b5df2d14SHong Zhang /* build cache for off array entries formed */ 21039566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 210426fbe8dcSKarl Rupp 2105b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21060298fd71SBarry Smith b->colmap = NULL; 21070298fd71SBarry Smith b->garray = NULL; 2108b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2109b5df2d14SHong Zhang 2110b5df2d14SHong Zhang /* stuff used in block assembly */ 2111f4259b30SLisandro Dalcin b->barray = NULL; 2112b5df2d14SHong Zhang 2113b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2114f4259b30SLisandro Dalcin b->lvec = NULL; 2115f4259b30SLisandro Dalcin b->Mvctx = NULL; 2116f4259b30SLisandro Dalcin b->slvec0 = NULL; 2117f4259b30SLisandro Dalcin b->slvec0b = NULL; 2118f4259b30SLisandro Dalcin b->slvec1 = NULL; 2119f4259b30SLisandro Dalcin b->slvec1a = NULL; 2120f4259b30SLisandro Dalcin b->slvec1b = NULL; 2121f4259b30SLisandro Dalcin b->sMvctx = NULL; 2122b5df2d14SHong Zhang 2123b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2124f4259b30SLisandro Dalcin b->rowindices = NULL; 2125f4259b30SLisandro Dalcin b->rowvalues = NULL; 2126b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2127b5df2d14SHong Zhang 2128b5df2d14SHong Zhang /* hash table stuff */ 2129f4259b30SLisandro Dalcin b->ht = NULL; 2130f4259b30SLisandro Dalcin b->hd = NULL; 2131b5df2d14SHong Zhang b->ht_size = 0; 2132b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2133b5df2d14SHong Zhang b->ht_fact = 0; 2134b5df2d14SHong Zhang b->ht_total_ct = 0; 2135b5df2d14SHong Zhang b->ht_insert_ct = 0; 2136b5df2d14SHong Zhang 21377dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 21387a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 21397a868f3eSHong Zhang 2140f4259b30SLisandro Dalcin b->in_loc = NULL; 2141f4259b30SLisandro Dalcin b->v_loc = NULL; 214259ffdab8SBarry Smith b->n_loc = 0; 214394ae4db5SBarry Smith 21449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 21459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 21469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 21479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 21486214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 21499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 21506214f412SHong Zhang #endif 2151d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 21529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2153d24d4204SJose E. Roman #endif 21549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2156aa5a9175SDahai Guo 2157b94d7dedSBarry Smith B->symmetric = PETSC_BOOL3_TRUE; 2158b94d7dedSBarry Smith B->structurally_symmetric = PETSC_BOOL3_TRUE; 2159b94d7dedSBarry Smith B->symmetry_eternal = PETSC_TRUE; 2160b94d7dedSBarry Smith B->structural_symmetry_eternal = PETSC_TRUE; 2161eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 2162b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_FALSE; 2163eb1ec7c1SStefano Zampini #else 2164b94d7dedSBarry Smith B->hermitian = PETSC_BOOL3_TRUE; 2165eb1ec7c1SStefano Zampini #endif 216613647f61SHong Zhang 21679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2168d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 21699566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 217094ae4db5SBarry Smith if (flg) { 217194ae4db5SBarry Smith PetscReal fact = 1.39; 21729566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 21739566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 217494ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 21759566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 21769566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 217794ae4db5SBarry Smith } 2178d0609cedSBarry Smith PetscOptionsEnd(); 21793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2180b5df2d14SHong Zhang } 2181b5df2d14SHong Zhang 2182209238afSKris Buschelman /*MC 2183002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2184209238afSKris Buschelman 218511a5261eSBarry Smith This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 218611a5261eSBarry Smith and `MATMPISBAIJ` otherwise. 2187209238afSKris Buschelman 218811a5261eSBarry Smith Options Database Key: 2189c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2190209238afSKris Buschelman 2191209238afSKris Buschelman Level: beginner 2192209238afSKris Buschelman 2193*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2194209238afSKris Buschelman M*/ 2195209238afSKris Buschelman 2196b5df2d14SHong Zhang /*@C 2197b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2198b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2199b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2200b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2201b5df2d14SHong Zhang 2202c3339decSBarry Smith Collective 2203b5df2d14SHong Zhang 2204b5df2d14SHong Zhang Input Parameters: 22051c4f3114SJed Brown + B - the matrix 2206bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2207bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2208b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2209b5df2d14SHong Zhang submatrix (same for all local rows) 2210b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22116d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22122ef1f0ffSBarry Smith (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 221395742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2214b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2215b5df2d14SHong Zhang submatrix (same for all local rows). 2216b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2217c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22182ef1f0ffSBarry Smith (possibly different for each block row) or `NULL`. 2219b5df2d14SHong Zhang 2220b5df2d14SHong Zhang Options Database Keys: 2221a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2222b5df2d14SHong Zhang block calculations (much slower) 2223a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2224b5df2d14SHong Zhang 22252ef1f0ffSBarry Smith Level: intermediate 22262ef1f0ffSBarry Smith 2227b5df2d14SHong Zhang Notes: 2228b5df2d14SHong Zhang 222911a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2230b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2231b5df2d14SHong Zhang 223249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 223349a6f317SBarry Smith 2234b5df2d14SHong Zhang Storage Information: 2235b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2236b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2237b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2238b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2239b5df2d14SHong Zhang 2240b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 22412ef1f0ffSBarry Smith the local submatrix with either `d_nz` or `d_nnz` (not both). Set 22422ef1f0ffSBarry Smith `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2243b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 22442ef1f0ffSBarry Smith off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2245b5df2d14SHong Zhang 224611a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 2247aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 22482ef1f0ffSBarry Smith You can also run with the option `-info` and look for messages with the string 2249aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2250aa95bbe8SBarry Smith 2251b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2252b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2253b5df2d14SHong Zhang 2254b5df2d14SHong Zhang .vb 2255b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2256a4b1a0f6SJed Brown -------------------------- 2257c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2258c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2259c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2260a4b1a0f6SJed Brown -------------------------- 2261b5df2d14SHong Zhang .ve 2262b5df2d14SHong Zhang 2263b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2264b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 22656d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 226611a5261eSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2267b5df2d14SHong Zhang 22682ef1f0ffSBarry Smith Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 22696d10fdaeSSatish Balay plus the diagonal part of the d matrix, 22702ef1f0ffSBarry Smith and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2271c2fc9fa9SBarry Smith 2272b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 22732ef1f0ffSBarry Smith one expects `d_nz` >> `o_nz`. 2274b5df2d14SHong Zhang 2275*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2276b5df2d14SHong Zhang @*/ 2277d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2278d71ae5a4SJacob Faibussowitsch { 2279b5df2d14SHong Zhang PetscFunctionBegin; 22806ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 22816ba663aaSJed Brown PetscValidType(B, 1); 22826ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2283cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 22843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2285b5df2d14SHong Zhang } 2286b5df2d14SHong Zhang 2287a30f8f8cSSatish Balay /*@C 228811a5261eSBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2289a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2290a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 229120f4b53cSBarry Smith `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2292a30f8f8cSSatish Balay 2293d083f849SBarry Smith Collective 2294a30f8f8cSSatish Balay 2295a30f8f8cSSatish Balay Input Parameters: 2296a30f8f8cSSatish Balay + comm - MPI communicator 229711a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 229820f4b53cSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 229920f4b53cSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2300a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2301a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 230220f4b53cSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2303a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2304a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 230520f4b53cSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 230620f4b53cSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2307a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2308a30f8f8cSSatish Balay submatrix (same for all local rows) 2309a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23106d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23112ef1f0ffSBarry Smith (possibly different for each block block row) or `NULL`. 231295742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 231395742e49SBarry Smith set its value even if it is zero. 2314a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2315a30f8f8cSSatish Balay submatrix (same for all local rows). 2316a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2317a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23182ef1f0ffSBarry Smith each block row) or `NULL`. 2319a30f8f8cSSatish Balay 2320a30f8f8cSSatish Balay Output Parameter: 2321a30f8f8cSSatish Balay . A - the matrix 2322a30f8f8cSSatish Balay 2323a30f8f8cSSatish Balay Options Database Keys: 2324a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2325a30f8f8cSSatish Balay block calculations (much slower) 2326a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2327a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2328a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2329a30f8f8cSSatish Balay 23302ef1f0ffSBarry Smith Level: intermediate 23312ef1f0ffSBarry Smith 23322ef1f0ffSBarry Smith Notes: 233311a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2334f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 233511a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2336175b88e8SBarry Smith 2337d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 23386d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2339d1be2dadSMatthew Knepley 2340a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2341a30f8f8cSSatish Balay (possibly both). 2342a30f8f8cSSatish Balay 234311a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2344a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2345a30f8f8cSSatish Balay 234649a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 234749a6f317SBarry Smith 2348a30f8f8cSSatish Balay Storage Information: 2349a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2350a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2351a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2352a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2353a30f8f8cSSatish Balay 2354a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 23552ef1f0ffSBarry Smith the local submatrix with either `d_nz` or `d_nnz` (not both). Set 23562ef1f0ffSBarry Smith `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2357a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 23582ef1f0ffSBarry Smith off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2359a30f8f8cSSatish Balay 2360a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2361a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2362a30f8f8cSSatish Balay 2363a30f8f8cSSatish Balay .vb 2364a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2365a4b1a0f6SJed Brown -------------------------- 2366c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2367c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2368c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2369a4b1a0f6SJed Brown -------------------------- 2370a30f8f8cSSatish Balay .ve 2371a30f8f8cSSatish Balay 2372a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2373a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 23746d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 23752ef1f0ffSBarry Smith `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2376a30f8f8cSSatish Balay 23772ef1f0ffSBarry Smith Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 23786d10fdaeSSatish Balay plus the diagonal part of the d matrix, 23792ef1f0ffSBarry Smith and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2380a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 23812ef1f0ffSBarry Smith one expects `d_nz` >> `o_nz`. 2382a30f8f8cSSatish Balay 2383*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2384a30f8f8cSSatish Balay @*/ 2385a30f8f8cSSatish Balay 2386d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2387d71ae5a4SJacob Faibussowitsch { 23881302d50aSBarry Smith PetscMPIInt size; 2389a30f8f8cSSatish Balay 2390a30f8f8cSSatish Balay PetscFunctionBegin; 23919566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 23929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 23939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2394273d9f13SBarry Smith if (size > 1) { 23959566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPISBAIJ)); 23969566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2397273d9f13SBarry Smith } else { 23989566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQSBAIJ)); 23999566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2400273d9f13SBarry Smith } 24013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2402a30f8f8cSSatish Balay } 2403a30f8f8cSSatish Balay 2404d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2405d71ae5a4SJacob Faibussowitsch { 2406a30f8f8cSSatish Balay Mat mat; 2407a30f8f8cSSatish Balay Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2408d0f46423SBarry Smith PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2409387bc808SHong Zhang PetscScalar *array; 2410a30f8f8cSSatish Balay 2411a30f8f8cSSatish Balay PetscFunctionBegin; 2412f4259b30SLisandro Dalcin *newmat = NULL; 241326fbe8dcSKarl Rupp 24149566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 24159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 24169566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 24179566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 24189566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2419e1b6402fSHong Zhang 2420d5f3da31SBarry Smith mat->factortype = matin->factortype; 2421273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 242282327fa8SHong Zhang mat->assembled = PETSC_TRUE; 24237fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24247fff6886SHong Zhang 2425b5df2d14SHong Zhang a = (Mat_MPISBAIJ *)mat->data; 2426a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2427a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2428a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2429a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2430a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2431a30f8f8cSSatish Balay 2432a30f8f8cSSatish Balay a->size = oldmat->size; 2433a30f8f8cSSatish Balay a->rank = oldmat->rank; 2434a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2435a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2436f4259b30SLisandro Dalcin a->rowindices = NULL; 2437f4259b30SLisandro Dalcin a->rowvalues = NULL; 2438a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2439f4259b30SLisandro Dalcin a->barray = NULL; 2440899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2441899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2442899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2443899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2444a30f8f8cSSatish Balay 2445a30f8f8cSSatish Balay /* hash table stuff */ 2446f4259b30SLisandro Dalcin a->ht = NULL; 2447f4259b30SLisandro Dalcin a->hd = NULL; 2448a30f8f8cSSatish Balay a->ht_size = 0; 2449a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2450a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2451a30f8f8cSSatish Balay a->ht_total_ct = 0; 2452a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2453a30f8f8cSSatish Balay 24549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2455a30f8f8cSSatish Balay if (oldmat->colmap) { 2456a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2457eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2458a30f8f8cSSatish Balay #else 24599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 24609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2461a30f8f8cSSatish Balay #endif 2462f4259b30SLisandro Dalcin } else a->colmap = NULL; 2463387bc808SHong Zhang 2464a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 24659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len, &a->garray)); 24669566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2467f4259b30SLisandro Dalcin } else a->garray = NULL; 2468a30f8f8cSSatish Balay 24699566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 24709566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 24719566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 247282327fa8SHong Zhang 24739566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 24749566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2475387bc808SHong Zhang 24769566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(a->slvec1, &nt)); 24779566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec1, &array)); 24789566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 24799566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 24809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec1, &array)); 24819566063dSJacob Faibussowitsch PetscCall(VecGetArray(a->slvec0, &array)); 24829566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 24839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(a->slvec0, &array)); 2484387bc808SHong Zhang 2485387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 24869566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2487387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 248882327fa8SHong Zhang 24899566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 24909566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 24919566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2492a30f8f8cSSatish Balay *newmat = mat; 24933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2494a30f8f8cSSatish Balay } 2495a30f8f8cSSatish Balay 2496618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2497618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2498618cc2edSLisandro Dalcin 2499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2500d71ae5a4SJacob Faibussowitsch { 25017f489da9SVaclav Hapla PetscBool isbinary; 250295936485SShri Abhyankar 250395936485SShri Abhyankar PetscFunctionBegin; 25049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 25055f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 25069566063dSJacob Faibussowitsch PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 25073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 250895936485SShri Abhyankar } 250995936485SShri Abhyankar 2510d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2511d71ae5a4SJacob Faibussowitsch { 251224d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2513f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2514ca54ac64SHong Zhang PetscReal atmp; 251587828ca2SBarry Smith PetscReal *work, *svalues, *rvalues; 25161302d50aSBarry Smith PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 25171302d50aSBarry Smith PetscMPIInt rank, size; 25181302d50aSBarry Smith PetscInt *rowners_bs, dest, count, source; 251987828ca2SBarry Smith PetscScalar *va; 25208a1c53f2SBarry Smith MatScalar *ba; 2521f4c0e9e4SHong Zhang MPI_Status stat; 252224d5174aSHong Zhang 252324d5174aSHong Zhang PetscFunctionBegin; 25245f80ce2aSJacob Faibussowitsch PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 25259566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 25269566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &va)); 2527f4c0e9e4SHong Zhang 25289566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 25299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2530f4c0e9e4SHong Zhang 2531d0f46423SBarry Smith bs = A->rmap->bs; 2532f4c0e9e4SHong Zhang mbs = a->mbs; 2533f4c0e9e4SHong Zhang Mbs = a->Mbs; 2534f4c0e9e4SHong Zhang ba = b->a; 2535f4c0e9e4SHong Zhang bi = b->i; 2536f4c0e9e4SHong Zhang bj = b->j; 2537f4c0e9e4SHong Zhang 2538f4c0e9e4SHong Zhang /* find ownerships */ 2539d0f46423SBarry Smith rowners_bs = A->rmap->range; 2540f4c0e9e4SHong Zhang 2541f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 25429566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs * Mbs, &work)); 2543f4c0e9e4SHong Zhang 2544f4c0e9e4SHong Zhang /* row_max for B */ 2545b8475685SHong Zhang if (rank != size - 1) { 2546f4c0e9e4SHong Zhang for (i = 0; i < mbs; i++) { 25479371c9d4SSatish Balay ncols = bi[1] - bi[0]; 25489371c9d4SSatish Balay bi++; 2549f4c0e9e4SHong Zhang brow = bs * i; 2550f4c0e9e4SHong Zhang for (j = 0; j < ncols; j++) { 2551f4c0e9e4SHong Zhang bcol = bs * (*bj); 2552f4c0e9e4SHong Zhang for (kcol = 0; kcol < bs; kcol++) { 2553ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 255404d41228SHong Zhang col += rowners_bs[rank + 1]; /* global col index */ 2555f4c0e9e4SHong Zhang for (krow = 0; krow < bs; krow++) { 25569371c9d4SSatish Balay atmp = PetscAbsScalar(*ba); 25579371c9d4SSatish Balay ba++; 2558ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2559ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2560f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2561f4c0e9e4SHong Zhang } 2562f4c0e9e4SHong Zhang } 2563f4c0e9e4SHong Zhang bj++; 2564f4c0e9e4SHong Zhang } 2565f4c0e9e4SHong Zhang } 2566f4c0e9e4SHong Zhang 2567f4c0e9e4SHong Zhang /* send values to its owners */ 2568f4c0e9e4SHong Zhang for (dest = rank + 1; dest < size; dest++) { 2569f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2570ca54ac64SHong Zhang count = rowners_bs[dest + 1] - rowners_bs[dest]; 25719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2572ca54ac64SHong Zhang } 2573f4c0e9e4SHong Zhang } 2574f4c0e9e4SHong Zhang 2575f4c0e9e4SHong Zhang /* receive values */ 2576ca54ac64SHong Zhang if (rank) { 2577f4c0e9e4SHong Zhang rvalues = work; 2578ca54ac64SHong Zhang count = rowners_bs[rank + 1] - rowners_bs[rank]; 2579f4c0e9e4SHong Zhang for (source = 0; source < rank; source++) { 25809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2581f4c0e9e4SHong Zhang /* process values */ 2582f4c0e9e4SHong Zhang for (i = 0; i < count; i++) { 2583ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2584f4c0e9e4SHong Zhang } 2585f4c0e9e4SHong Zhang } 2586ca54ac64SHong Zhang } 2587f4c0e9e4SHong Zhang 25889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &va)); 25899566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 25903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259124d5174aSHong Zhang } 25922798e883SHong Zhang 2593d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2594d71ae5a4SJacob Faibussowitsch { 25952798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2596d0f46423SBarry Smith PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 25973649974fSBarry Smith PetscScalar *x, *ptr, *from; 2598ffe4fb16SHong Zhang Vec bb1; 25993649974fSBarry Smith const PetscScalar *b; 2600ffe4fb16SHong Zhang 2601ffe4fb16SHong Zhang PetscFunctionBegin; 26025f80ce2aSJacob Faibussowitsch PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 26035f80ce2aSJacob Faibussowitsch PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2604ffe4fb16SHong Zhang 2605a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 26069566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 26073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2608a2b30743SBarry Smith } 2609a2b30743SBarry Smith 2610ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2611ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 26129566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2613ffe4fb16SHong Zhang its--; 2614ffe4fb16SHong Zhang } 2615ffe4fb16SHong Zhang 26169566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &bb1)); 2617ffe4fb16SHong Zhang while (its--) { 2618ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 26199566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2620ffe4fb16SHong Zhang 2621ffe4fb16SHong Zhang /* copy xx into slvec0a */ 26229566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &ptr)); 26239566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 26259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2626ffe4fb16SHong Zhang 26279566063dSJacob Faibussowitsch PetscCall(VecScale(mat->slvec0, -1.0)); 2628ffe4fb16SHong Zhang 2629ffe4fb16SHong Zhang /* copy bb into slvec1a */ 26309566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1, &ptr)); 26319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 26339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2634ffe4fb16SHong Zhang 2635ffe4fb16SHong Zhang /* set slvec1b = 0 */ 26369566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 2637ffe4fb16SHong Zhang 26389566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 26399566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 26409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 26419566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2642ffe4fb16SHong Zhang 2643ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 26449566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2645ffe4fb16SHong Zhang 2646ffe4fb16SHong Zhang /* local diagonal sweep */ 26479566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2648ffe4fb16SHong Zhang } 26499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 2650fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26519566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2652fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 26539566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2654fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2655fa22f6d0SBarry Smith Vec xx1; 2656ace3abfcSBarry Smith PetscBool hasop; 265720f1ed55SBarry Smith const PetscScalar *diag; 2658887ee2caSBarry Smith PetscScalar *sl, scale = (omega - 2.0) / omega; 265920f1ed55SBarry Smith PetscInt i, n; 2660fa22f6d0SBarry Smith 2661fa22f6d0SBarry Smith if (!mat->xx1) { 26629566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->xx1)); 26639566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &mat->bb1)); 2664fa22f6d0SBarry Smith } 2665fa22f6d0SBarry Smith xx1 = mat->xx1; 2666fa22f6d0SBarry Smith bb1 = mat->bb1; 2667fa22f6d0SBarry Smith 26689566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2669fa22f6d0SBarry Smith 2670fa22f6d0SBarry Smith if (!mat->diag) { 2671effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 26729566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 26739566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin, mat->diag)); 2674fa22f6d0SBarry Smith } 26759566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2676fa22f6d0SBarry Smith 2677fa22f6d0SBarry Smith if (hasop) { 26789566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 26799566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 268020f1ed55SBarry Smith } else { 268120f1ed55SBarry Smith /* 268220f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 26839566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 26849566063dSJacob Faibussowitsch PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 268520f1ed55SBarry Smith */ 26869566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec1a, &sl)); 26879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mat->diag, &diag)); 26889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 26899566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 26909566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &n)); 2691887ee2caSBarry Smith if (omega == 1.0) { 269226fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 26939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * n)); 2694887ee2caSBarry Smith } else { 269526fbe8dcSKarl Rupp for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 26969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(3.0 * n)); 2697887ee2caSBarry Smith } 26989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 26999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 27009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 27019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 270220f1ed55SBarry Smith } 2703fa22f6d0SBarry Smith 2704fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 27059566063dSJacob Faibussowitsch PetscCall(VecSet(mat->slvec1b, 0.0)); 27069566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 27079566063dSJacob Faibussowitsch PetscCall(VecGetArray(mat->slvec0, &from)); 27089566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 27099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(from, x, bs * mbs)); 27109566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mat->slvec0, &from)); 27119566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 27129566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27139566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 27149566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2715fa22f6d0SBarry Smith 2716fa22f6d0SBarry Smith /* local sweep */ 27179566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 27189566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx, 1.0, xx1)); 2719f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 27203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2721ffe4fb16SHong Zhang } 2722ffe4fb16SHong Zhang 2723dfb205c3SBarry Smith /*@ 272411a5261eSBarry Smith MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2725dfb205c3SBarry Smith CSR format the local rows. 2726dfb205c3SBarry Smith 2727d083f849SBarry Smith Collective 2728dfb205c3SBarry Smith 2729dfb205c3SBarry Smith Input Parameters: 2730dfb205c3SBarry Smith + comm - MPI communicator 2731dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 273211a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 2733dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 273411a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 27352ef1f0ffSBarry Smith calculated if `N` is given) For square matrices `n` is almost always `m`. 27362ef1f0ffSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 27372ef1f0ffSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2738483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2739dfb205c3SBarry Smith . j - column indices 2740dfb205c3SBarry Smith - a - matrix values 2741dfb205c3SBarry Smith 2742dfb205c3SBarry Smith Output Parameter: 2743dfb205c3SBarry Smith . mat - the matrix 2744dfb205c3SBarry Smith 2745dfb205c3SBarry Smith Level: intermediate 2746dfb205c3SBarry Smith 2747dfb205c3SBarry Smith Notes: 27482ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 27492ef1f0ffSBarry Smith thus you CANNOT change the matrix entries by changing the values of `a` after you have 27502ef1f0ffSBarry Smith called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2751dfb205c3SBarry Smith 27522ef1f0ffSBarry Smith The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2753dfb205c3SBarry Smith 2754*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2755db781477SPatrick Sanan `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2756dfb205c3SBarry Smith @*/ 2757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2758d71ae5a4SJacob Faibussowitsch { 2759dfb205c3SBarry Smith PetscFunctionBegin; 27605f80ce2aSJacob Faibussowitsch PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 27615f80ce2aSJacob Faibussowitsch PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 27629566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 27639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 27649566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPISBAIJ)); 27659566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 27663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2767dfb205c3SBarry Smith } 2768dfb205c3SBarry Smith 2769dfb205c3SBarry Smith /*@C 277011a5261eSBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2771dfb205c3SBarry Smith 2772d083f849SBarry Smith Collective 2773dfb205c3SBarry Smith 2774dfb205c3SBarry Smith Input Parameters: 27751c4f3114SJed Brown + B - the matrix 2776dfb205c3SBarry Smith . bs - the block size 27772ef1f0ffSBarry Smith . i - the indices into `j` for the start of each local row (starts with zero) 2778dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2779dfb205c3SBarry Smith - v - optional values in the matrix 2780dfb205c3SBarry Smith 2781664954b6SBarry Smith Level: advanced 2782664954b6SBarry Smith 2783664954b6SBarry Smith Notes: 27840cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 27850cd7f59aSBarry Smith and usually the numerical values as well 27860cd7f59aSBarry Smith 278750c5228eSBarry Smith Any entries below the diagonal are ignored 2788dfb205c3SBarry Smith 2789*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2790dfb205c3SBarry Smith @*/ 2791d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2792d71ae5a4SJacob Faibussowitsch { 2793dfb205c3SBarry Smith PetscFunctionBegin; 2794cac4c232SBarry Smith PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 27953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2796dfb205c3SBarry Smith } 2797dfb205c3SBarry Smith 2798d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2799d71ae5a4SJacob Faibussowitsch { 280010c56fdeSHong Zhang PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 280110c56fdeSHong Zhang PetscInt *indx; 280210c56fdeSHong Zhang PetscScalar *values; 2803dfb205c3SBarry Smith 28044dcd73b1SHong Zhang PetscFunctionBegin; 28059566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 280610c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 280710c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2808de25e9cbSPierre Jolivet PetscInt *dnz, *onz, mbs, Nbs, nbs; 280910c56fdeSHong Zhang PetscInt *bindx, rmax = a->rmax, j; 2810de25e9cbSPierre Jolivet PetscMPIInt rank, size; 28114dcd73b1SHong Zhang 28129566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28139371c9d4SSatish Balay mbs = m / bs; 28149371c9d4SSatish Balay Nbs = N / cbs; 281548a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2816da91a574SPierre Jolivet nbs = n / cbs; 28174dcd73b1SHong Zhang 28189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(rmax, &bindx)); 2819d0609cedSBarry Smith MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2820de25e9cbSPierre Jolivet 28219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 28229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &size)); 2823de25e9cbSPierre Jolivet if (rank == size - 1) { 2824de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 28255f80ce2aSJacob Faibussowitsch PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2826de25e9cbSPierre Jolivet } 2827de25e9cbSPierre Jolivet 2828d0609cedSBarry Smith rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 28299566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 283010c56fdeSHong Zhang for (i = 0; i < mbs; i++) { 28319566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 28324dcd73b1SHong Zhang nnz = nnz / bs; 28334dcd73b1SHong Zhang for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 28349566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 28359566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 28364dcd73b1SHong Zhang } 28379566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28389566063dSJacob Faibussowitsch PetscCall(PetscFree(bindx)); 28394dcd73b1SHong Zhang 28409566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 28419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 28429566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 28439566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, MATSBAIJ)); 28449566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 28459566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2846d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 28474dcd73b1SHong Zhang } 28484dcd73b1SHong Zhang 284910c56fdeSHong Zhang /* numeric phase */ 28509566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 28519566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 28524dcd73b1SHong Zhang 28539566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 28544dcd73b1SHong Zhang for (i = 0; i < m; i++) { 28559566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28564dcd73b1SHong Zhang Ii = i + rstart; 28579566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 28589566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 28594dcd73b1SHong Zhang } 28609566063dSJacob Faibussowitsch PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 28619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 28629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 28633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28644dcd73b1SHong Zhang } 2865