12593348eSBarry Smith /* 2b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 32593348eSBarry Smith matrix storage format. 42593348eSBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6c6db04a5SJed Brown #include <petscblaslapack.h> 7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 943516a2dSKris Buschelman 1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 1126cec326SBarry Smith #define TYPE BAIJ 1226cec326SBarry Smith #define TYPE_BS 1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1426cec326SBarry Smith #undef TYPE_BS 1526cec326SBarry Smith #define TYPE_BS _BS 1626cec326SBarry Smith #define TYPE_BS_ON 1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1826cec326SBarry Smith #undef TYPE_BS 1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h" 2026cec326SBarry Smith #undef TYPE 2126cec326SBarry Smith #undef TYPE_BS_ON 2226cec326SBarry Smith 237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 257ea3e4caSstefano_zampini #endif 267ea3e4caSstefano_zampini 27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29b5b72c8aSIrina Sokolova #endif 30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31b5b72c8aSIrina Sokolova 32ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33d71ae5a4SJacob Faibussowitsch { 349463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35ff6a9541SJacob Faibussowitsch PetscInt m, n, ib, jb, bs = A->rmap->bs; 369463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 379463ebdaSPierre Jolivet 389463ebdaSPierre Jolivet PetscFunctionBegin; 399566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 40ff6a9541SJacob Faibussowitsch PetscCall(PetscArrayzero(reductions, n)); 419463ebdaSPierre Jolivet if (type == NORM_2) { 42ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 439463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 449463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 45857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 469463ebdaSPierre Jolivet a_val++; 479463ebdaSPierre Jolivet } 489463ebdaSPierre Jolivet } 499463ebdaSPierre Jolivet } 509463ebdaSPierre Jolivet } else if (type == NORM_1) { 51ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 529463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 539463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 54857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 559463ebdaSPierre Jolivet a_val++; 569463ebdaSPierre Jolivet } 579463ebdaSPierre Jolivet } 589463ebdaSPierre Jolivet } 599463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 60ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 619463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 629463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 636497c311SBarry Smith PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 659463ebdaSPierre Jolivet a_val++; 669463ebdaSPierre Jolivet } 679463ebdaSPierre Jolivet } 689463ebdaSPierre Jolivet } 69857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 72857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 73857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74857cbf51SRichard Tran Mills a_val++; 75857cbf51SRichard Tran Mills } 76857cbf51SRichard Tran Mills } 77857cbf51SRichard Tran Mills } 78857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 81857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 82857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83857cbf51SRichard Tran Mills a_val++; 84857cbf51SRichard Tran Mills } 85857cbf51SRichard Tran Mills } 86857cbf51SRichard Tran Mills } 87857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 889463ebdaSPierre Jolivet if (type == NORM_2) { 89ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 929463ebdaSPierre Jolivet } 933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 949463ebdaSPierre Jolivet } 959463ebdaSPierre Jolivet 9666976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97d71ae5a4SJacob Faibussowitsch { 98b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99de80f912SBarry Smith PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 1007f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 10162bba022SBarry Smith PetscReal shift = 0.0; 1021a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103b01c7715SBarry Smith 104b01c7715SBarry Smith PetscFunctionBegin; 105a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 106a455e926SHong Zhang 1079797317bSBarry Smith if (a->idiagvalid) { 1089797317bSBarry Smith if (values) *values = a->idiag; 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1109797317bSBarry Smith } 1119566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112b01c7715SBarry Smith diag_offset = a->diag; 1133a7d0413SPierre Jolivet if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 114b01c7715SBarry Smith diag = a->idiag; 115bbead8a2SBarry Smith if (values) *values = a->idiag; 116b01c7715SBarry Smith /* factor and invert each block */ 117521d7252SBarry Smith switch (bs) { 118ab040260SJed Brown case 1: 119ab040260SJed Brown for (i = 0; i < mbs; i++) { 120ab040260SJed Brown odiag = v + 1 * diag_offset[i]; 121ab040260SJed Brown diag[0] = odiag[0]; 122ec1892c8SHong Zhang 123ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124966bd95aSPierre Jolivet PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 1257b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1267b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1277b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 129ec1892c8SHong Zhang } 130ec1892c8SHong Zhang 131d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 132ab040260SJed Brown diag += 1; 133ab040260SJed Brown } 134ab040260SJed Brown break; 135b01c7715SBarry Smith case 2: 136b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 137b01c7715SBarry Smith odiag = v + 4 * diag_offset[i]; 1389371c9d4SSatish Balay diag[0] = odiag[0]; 1399371c9d4SSatish Balay diag[1] = odiag[1]; 1409371c9d4SSatish Balay diag[2] = odiag[2]; 1419371c9d4SSatish Balay diag[3] = odiag[3]; 1429566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1437b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 144b01c7715SBarry Smith diag += 4; 145b01c7715SBarry Smith } 146b01c7715SBarry Smith break; 147b01c7715SBarry Smith case 3: 148b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 149b01c7715SBarry Smith odiag = v + 9 * diag_offset[i]; 1509371c9d4SSatish Balay diag[0] = odiag[0]; 1519371c9d4SSatish Balay diag[1] = odiag[1]; 1529371c9d4SSatish Balay diag[2] = odiag[2]; 1539371c9d4SSatish Balay diag[3] = odiag[3]; 1549371c9d4SSatish Balay diag[4] = odiag[4]; 1559371c9d4SSatish Balay diag[5] = odiag[5]; 1569371c9d4SSatish Balay diag[6] = odiag[6]; 1579371c9d4SSatish Balay diag[7] = odiag[7]; 158b01c7715SBarry Smith diag[8] = odiag[8]; 1599566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1607b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 161b01c7715SBarry Smith diag += 9; 162b01c7715SBarry Smith } 163b01c7715SBarry Smith break; 164b01c7715SBarry Smith case 4: 165b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 166b01c7715SBarry Smith odiag = v + 16 * diag_offset[i]; 1679566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1689566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1697b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 170b01c7715SBarry Smith diag += 16; 171b01c7715SBarry Smith } 172b01c7715SBarry Smith break; 173b01c7715SBarry Smith case 5: 174b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 175b01c7715SBarry Smith odiag = v + 25 * diag_offset[i]; 1769566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1779566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1787b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 179b01c7715SBarry Smith diag += 25; 180b01c7715SBarry Smith } 181b01c7715SBarry Smith break; 182d49b2adcSBarry Smith case 6: 183d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 184d49b2adcSBarry Smith odiag = v + 36 * diag_offset[i]; 1859566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1869566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1877b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 188d49b2adcSBarry Smith diag += 36; 189d49b2adcSBarry Smith } 190d49b2adcSBarry Smith break; 191de80f912SBarry Smith case 7: 192de80f912SBarry Smith for (i = 0; i < mbs; i++) { 193de80f912SBarry Smith odiag = v + 49 * diag_offset[i]; 1949566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1959566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1967b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 197de80f912SBarry Smith diag += 49; 198de80f912SBarry Smith } 199de80f912SBarry Smith break; 200b01c7715SBarry Smith default: 2019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 202de80f912SBarry Smith for (i = 0; i < mbs; i++) { 203de80f912SBarry Smith odiag = v + bs2 * diag_offset[i]; 2049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 2059566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 2067b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 207de80f912SBarry Smith diag += bs2; 208de80f912SBarry Smith } 2099566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 210b01c7715SBarry Smith } 211b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 2123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 213b01c7715SBarry Smith } 214b01c7715SBarry Smith 21566976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 216d71ae5a4SJacob Faibussowitsch { 2176d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 218e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 219e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 220e48d15efSToby Isaac const PetscScalar *b, *xb; 2215455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 222e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 223c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 224b01c7715SBarry Smith 225b01c7715SBarry Smith PetscFunctionBegin; 226b01c7715SBarry Smith its = its * lits; 2275f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2285f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2295f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2305f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2315f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 232b01c7715SBarry Smith 2339566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 234b01c7715SBarry Smith 2353ba16761SJacob Faibussowitsch if (!m) PetscFunctionReturn(PETSC_SUCCESS); 236b01c7715SBarry Smith diag = a->diag; 237b01c7715SBarry Smith idiag = a->idiag; 238de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 23948a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 24048a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 24148a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2423475c22fSBarry Smith work = a->mult_work; 2433475c22fSBarry Smith t = a->sor_workt; 244de80f912SBarry Smith w = a->sor_work; 245de80f912SBarry Smith 2469566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 248de80f912SBarry Smith 249de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 250de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 251e48d15efSToby Isaac switch (bs) { 252e48d15efSToby Isaac case 1: 253e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 254e48d15efSToby Isaac t[0] = b[0]; 255e48d15efSToby Isaac i2 = 1; 256e48d15efSToby Isaac idiag += 1; 257e48d15efSToby Isaac for (i = 1; i < m; i++) { 258e48d15efSToby Isaac v = aa + ai[i]; 259e48d15efSToby Isaac vi = aj + ai[i]; 260e48d15efSToby Isaac nz = diag[i] - ai[i]; 261e48d15efSToby Isaac s[0] = b[i2]; 262e48d15efSToby Isaac for (j = 0; j < nz; j++) { 263e48d15efSToby Isaac xw[0] = x[vi[j]]; 264e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 265e48d15efSToby Isaac } 266e48d15efSToby Isaac t[i2] = s[0]; 267e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 268e48d15efSToby Isaac x[i2] = xw[0]; 269e48d15efSToby Isaac idiag += 1; 270e48d15efSToby Isaac i2 += 1; 271e48d15efSToby Isaac } 272e48d15efSToby Isaac break; 273e48d15efSToby Isaac case 2: 274e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2759371c9d4SSatish Balay t[0] = b[0]; 2769371c9d4SSatish Balay t[1] = b[1]; 277e48d15efSToby Isaac i2 = 2; 278e48d15efSToby Isaac idiag += 4; 279e48d15efSToby Isaac for (i = 1; i < m; i++) { 280e48d15efSToby Isaac v = aa + 4 * ai[i]; 281e48d15efSToby Isaac vi = aj + ai[i]; 282e48d15efSToby Isaac nz = diag[i] - ai[i]; 2839371c9d4SSatish Balay s[0] = b[i2]; 2849371c9d4SSatish Balay s[1] = b[i2 + 1]; 285e48d15efSToby Isaac for (j = 0; j < nz; j++) { 286e48d15efSToby Isaac idx = 2 * vi[j]; 287e48d15efSToby Isaac it = 4 * j; 2889371c9d4SSatish Balay xw[0] = x[idx]; 2899371c9d4SSatish Balay xw[1] = x[1 + idx]; 290e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 291e48d15efSToby Isaac } 2929371c9d4SSatish Balay t[i2] = s[0]; 2939371c9d4SSatish Balay t[i2 + 1] = s[1]; 294e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2959371c9d4SSatish Balay x[i2] = xw[0]; 2969371c9d4SSatish Balay x[i2 + 1] = xw[1]; 297e48d15efSToby Isaac idiag += 4; 298e48d15efSToby Isaac i2 += 2; 299e48d15efSToby Isaac } 300e48d15efSToby Isaac break; 301e48d15efSToby Isaac case 3: 302e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 3039371c9d4SSatish Balay t[0] = b[0]; 3049371c9d4SSatish Balay t[1] = b[1]; 3059371c9d4SSatish Balay t[2] = b[2]; 306e48d15efSToby Isaac i2 = 3; 307e48d15efSToby Isaac idiag += 9; 308e48d15efSToby Isaac for (i = 1; i < m; i++) { 309e48d15efSToby Isaac v = aa + 9 * ai[i]; 310e48d15efSToby Isaac vi = aj + ai[i]; 311e48d15efSToby Isaac nz = diag[i] - ai[i]; 3129371c9d4SSatish Balay s[0] = b[i2]; 3139371c9d4SSatish Balay s[1] = b[i2 + 1]; 3149371c9d4SSatish Balay s[2] = b[i2 + 2]; 315e48d15efSToby Isaac while (nz--) { 316e48d15efSToby Isaac idx = 3 * (*vi++); 3179371c9d4SSatish Balay xw[0] = x[idx]; 3189371c9d4SSatish Balay xw[1] = x[1 + idx]; 3199371c9d4SSatish Balay xw[2] = x[2 + idx]; 320e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 321e48d15efSToby Isaac v += 9; 322e48d15efSToby Isaac } 3239371c9d4SSatish Balay t[i2] = s[0]; 3249371c9d4SSatish Balay t[i2 + 1] = s[1]; 3259371c9d4SSatish Balay t[i2 + 2] = s[2]; 326e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3279371c9d4SSatish Balay x[i2] = xw[0]; 3289371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3299371c9d4SSatish Balay x[i2 + 2] = xw[2]; 330e48d15efSToby Isaac idiag += 9; 331e48d15efSToby Isaac i2 += 3; 332e48d15efSToby Isaac } 333e48d15efSToby Isaac break; 334e48d15efSToby Isaac case 4: 335e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3369371c9d4SSatish Balay t[0] = b[0]; 3379371c9d4SSatish Balay t[1] = b[1]; 3389371c9d4SSatish Balay t[2] = b[2]; 3399371c9d4SSatish Balay t[3] = b[3]; 340e48d15efSToby Isaac i2 = 4; 341e48d15efSToby Isaac idiag += 16; 342e48d15efSToby Isaac for (i = 1; i < m; i++) { 343e48d15efSToby Isaac v = aa + 16 * ai[i]; 344e48d15efSToby Isaac vi = aj + ai[i]; 345e48d15efSToby Isaac nz = diag[i] - ai[i]; 3469371c9d4SSatish Balay s[0] = b[i2]; 3479371c9d4SSatish Balay s[1] = b[i2 + 1]; 3489371c9d4SSatish Balay s[2] = b[i2 + 2]; 3499371c9d4SSatish Balay s[3] = b[i2 + 3]; 350e48d15efSToby Isaac while (nz--) { 351e48d15efSToby Isaac idx = 4 * (*vi++); 3529371c9d4SSatish Balay xw[0] = x[idx]; 3539371c9d4SSatish Balay xw[1] = x[1 + idx]; 3549371c9d4SSatish Balay xw[2] = x[2 + idx]; 3559371c9d4SSatish Balay xw[3] = x[3 + idx]; 356e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 357e48d15efSToby Isaac v += 16; 358e48d15efSToby Isaac } 3599371c9d4SSatish Balay t[i2] = s[0]; 3609371c9d4SSatish Balay t[i2 + 1] = s[1]; 3619371c9d4SSatish Balay t[i2 + 2] = s[2]; 3629371c9d4SSatish Balay t[i2 + 3] = s[3]; 363e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3649371c9d4SSatish Balay x[i2] = xw[0]; 3659371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3669371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3679371c9d4SSatish Balay x[i2 + 3] = xw[3]; 368e48d15efSToby Isaac idiag += 16; 369e48d15efSToby Isaac i2 += 4; 370e48d15efSToby Isaac } 371e48d15efSToby Isaac break; 372e48d15efSToby Isaac case 5: 373e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3749371c9d4SSatish Balay t[0] = b[0]; 3759371c9d4SSatish Balay t[1] = b[1]; 3769371c9d4SSatish Balay t[2] = b[2]; 3779371c9d4SSatish Balay t[3] = b[3]; 3789371c9d4SSatish Balay t[4] = b[4]; 379e48d15efSToby Isaac i2 = 5; 380e48d15efSToby Isaac idiag += 25; 381e48d15efSToby Isaac for (i = 1; i < m; i++) { 382e48d15efSToby Isaac v = aa + 25 * ai[i]; 383e48d15efSToby Isaac vi = aj + ai[i]; 384e48d15efSToby Isaac nz = diag[i] - ai[i]; 3859371c9d4SSatish Balay s[0] = b[i2]; 3869371c9d4SSatish Balay s[1] = b[i2 + 1]; 3879371c9d4SSatish Balay s[2] = b[i2 + 2]; 3889371c9d4SSatish Balay s[3] = b[i2 + 3]; 3899371c9d4SSatish Balay s[4] = b[i2 + 4]; 390e48d15efSToby Isaac while (nz--) { 391e48d15efSToby Isaac idx = 5 * (*vi++); 3929371c9d4SSatish Balay xw[0] = x[idx]; 3939371c9d4SSatish Balay xw[1] = x[1 + idx]; 3949371c9d4SSatish Balay xw[2] = x[2 + idx]; 3959371c9d4SSatish Balay xw[3] = x[3 + idx]; 3969371c9d4SSatish Balay xw[4] = x[4 + idx]; 397e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 398e48d15efSToby Isaac v += 25; 399e48d15efSToby Isaac } 4009371c9d4SSatish Balay t[i2] = s[0]; 4019371c9d4SSatish Balay t[i2 + 1] = s[1]; 4029371c9d4SSatish Balay t[i2 + 2] = s[2]; 4039371c9d4SSatish Balay t[i2 + 3] = s[3]; 4049371c9d4SSatish Balay t[i2 + 4] = s[4]; 405e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 4069371c9d4SSatish Balay x[i2] = xw[0]; 4079371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4089371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4099371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4109371c9d4SSatish Balay x[i2 + 4] = xw[4]; 411e48d15efSToby Isaac idiag += 25; 412e48d15efSToby Isaac i2 += 5; 413e48d15efSToby Isaac } 414e48d15efSToby Isaac break; 415e48d15efSToby Isaac case 6: 416e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4179371c9d4SSatish Balay t[0] = b[0]; 4189371c9d4SSatish Balay t[1] = b[1]; 4199371c9d4SSatish Balay t[2] = b[2]; 4209371c9d4SSatish Balay t[3] = b[3]; 4219371c9d4SSatish Balay t[4] = b[4]; 4229371c9d4SSatish Balay t[5] = b[5]; 423e48d15efSToby Isaac i2 = 6; 424e48d15efSToby Isaac idiag += 36; 425e48d15efSToby Isaac for (i = 1; i < m; i++) { 426e48d15efSToby Isaac v = aa + 36 * ai[i]; 427e48d15efSToby Isaac vi = aj + ai[i]; 428e48d15efSToby Isaac nz = diag[i] - ai[i]; 4299371c9d4SSatish Balay s[0] = b[i2]; 4309371c9d4SSatish Balay s[1] = b[i2 + 1]; 4319371c9d4SSatish Balay s[2] = b[i2 + 2]; 4329371c9d4SSatish Balay s[3] = b[i2 + 3]; 4339371c9d4SSatish Balay s[4] = b[i2 + 4]; 4349371c9d4SSatish Balay s[5] = b[i2 + 5]; 435e48d15efSToby Isaac while (nz--) { 436e48d15efSToby Isaac idx = 6 * (*vi++); 4379371c9d4SSatish Balay xw[0] = x[idx]; 4389371c9d4SSatish Balay xw[1] = x[1 + idx]; 4399371c9d4SSatish Balay xw[2] = x[2 + idx]; 4409371c9d4SSatish Balay xw[3] = x[3 + idx]; 4419371c9d4SSatish Balay xw[4] = x[4 + idx]; 4429371c9d4SSatish Balay xw[5] = x[5 + idx]; 443e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 444e48d15efSToby Isaac v += 36; 445e48d15efSToby Isaac } 4469371c9d4SSatish Balay t[i2] = s[0]; 4479371c9d4SSatish Balay t[i2 + 1] = s[1]; 4489371c9d4SSatish Balay t[i2 + 2] = s[2]; 4499371c9d4SSatish Balay t[i2 + 3] = s[3]; 4509371c9d4SSatish Balay t[i2 + 4] = s[4]; 4519371c9d4SSatish Balay t[i2 + 5] = s[5]; 452e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4539371c9d4SSatish Balay x[i2] = xw[0]; 4549371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4559371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4569371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4579371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4589371c9d4SSatish Balay x[i2 + 5] = xw[5]; 459e48d15efSToby Isaac idiag += 36; 460e48d15efSToby Isaac i2 += 6; 461e48d15efSToby Isaac } 462e48d15efSToby Isaac break; 463e48d15efSToby Isaac case 7: 464e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4659371c9d4SSatish Balay t[0] = b[0]; 4669371c9d4SSatish Balay t[1] = b[1]; 4679371c9d4SSatish Balay t[2] = b[2]; 4689371c9d4SSatish Balay t[3] = b[3]; 4699371c9d4SSatish Balay t[4] = b[4]; 4709371c9d4SSatish Balay t[5] = b[5]; 4719371c9d4SSatish Balay t[6] = b[6]; 472e48d15efSToby Isaac i2 = 7; 473e48d15efSToby Isaac idiag += 49; 474e48d15efSToby Isaac for (i = 1; i < m; i++) { 475e48d15efSToby Isaac v = aa + 49 * ai[i]; 476e48d15efSToby Isaac vi = aj + ai[i]; 477e48d15efSToby Isaac nz = diag[i] - ai[i]; 4789371c9d4SSatish Balay s[0] = b[i2]; 4799371c9d4SSatish Balay s[1] = b[i2 + 1]; 4809371c9d4SSatish Balay s[2] = b[i2 + 2]; 4819371c9d4SSatish Balay s[3] = b[i2 + 3]; 4829371c9d4SSatish Balay s[4] = b[i2 + 4]; 4839371c9d4SSatish Balay s[5] = b[i2 + 5]; 4849371c9d4SSatish Balay s[6] = b[i2 + 6]; 485e48d15efSToby Isaac while (nz--) { 486e48d15efSToby Isaac idx = 7 * (*vi++); 4879371c9d4SSatish Balay xw[0] = x[idx]; 4889371c9d4SSatish Balay xw[1] = x[1 + idx]; 4899371c9d4SSatish Balay xw[2] = x[2 + idx]; 4909371c9d4SSatish Balay xw[3] = x[3 + idx]; 4919371c9d4SSatish Balay xw[4] = x[4 + idx]; 4929371c9d4SSatish Balay xw[5] = x[5 + idx]; 4939371c9d4SSatish Balay xw[6] = x[6 + idx]; 494e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 495e48d15efSToby Isaac v += 49; 496e48d15efSToby Isaac } 4979371c9d4SSatish Balay t[i2] = s[0]; 4989371c9d4SSatish Balay t[i2 + 1] = s[1]; 4999371c9d4SSatish Balay t[i2 + 2] = s[2]; 5009371c9d4SSatish Balay t[i2 + 3] = s[3]; 5019371c9d4SSatish Balay t[i2 + 4] = s[4]; 5029371c9d4SSatish Balay t[i2 + 5] = s[5]; 5039371c9d4SSatish Balay t[i2 + 6] = s[6]; 504e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 5059371c9d4SSatish Balay x[i2] = xw[0]; 5069371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5079371c9d4SSatish Balay x[i2 + 2] = xw[2]; 5089371c9d4SSatish Balay x[i2 + 3] = xw[3]; 5099371c9d4SSatish Balay x[i2 + 4] = xw[4]; 5109371c9d4SSatish Balay x[i2 + 5] = xw[5]; 5119371c9d4SSatish Balay x[i2 + 6] = xw[6]; 512e48d15efSToby Isaac idiag += 49; 513e48d15efSToby Isaac i2 += 7; 514e48d15efSToby Isaac } 515e48d15efSToby Isaac break; 516e48d15efSToby Isaac default: 51796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5189566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 519de80f912SBarry Smith i2 = bs; 520de80f912SBarry Smith idiag += bs2; 521de80f912SBarry Smith for (i = 1; i < m; i++) { 522de80f912SBarry Smith v = aa + bs2 * ai[i]; 523de80f912SBarry Smith vi = aj + ai[i]; 524de80f912SBarry Smith nz = diag[i] - ai[i]; 525de80f912SBarry Smith 5269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 527de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 528de80f912SBarry Smith workt = work; 529de80f912SBarry Smith for (j = 0; j < nz; j++) { 5309566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 531de80f912SBarry Smith workt += bs; 532de80f912SBarry Smith } 53396b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5349566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 53596b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 536de80f912SBarry Smith 537de80f912SBarry Smith idiag += bs2; 538de80f912SBarry Smith i2 += bs; 539de80f912SBarry Smith } 540e48d15efSToby Isaac break; 541e48d15efSToby Isaac } 542de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 544e48d15efSToby Isaac xb = t; 5459371c9d4SSatish Balay } else xb = b; 546de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 547e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 548e48d15efSToby Isaac i2 = bs * (m - 1); 549e48d15efSToby Isaac switch (bs) { 550e48d15efSToby Isaac case 1: 551e48d15efSToby Isaac s[0] = xb[i2]; 552e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 553e48d15efSToby Isaac x[i2] = xw[0]; 554e48d15efSToby Isaac i2 -= 1; 555e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 556e48d15efSToby Isaac v = aa + (diag[i] + 1); 557e48d15efSToby Isaac vi = aj + diag[i] + 1; 558e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 559e48d15efSToby Isaac s[0] = xb[i2]; 560e48d15efSToby Isaac for (j = 0; j < nz; j++) { 561e48d15efSToby Isaac xw[0] = x[vi[j]]; 562e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 563e48d15efSToby Isaac } 564e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 565e48d15efSToby Isaac x[i2] = xw[0]; 566e48d15efSToby Isaac idiag -= 1; 567e48d15efSToby Isaac i2 -= 1; 568e48d15efSToby Isaac } 569e48d15efSToby Isaac break; 570e48d15efSToby Isaac case 2: 5719371c9d4SSatish Balay s[0] = xb[i2]; 5729371c9d4SSatish Balay s[1] = xb[i2 + 1]; 573e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5749371c9d4SSatish Balay x[i2] = xw[0]; 5759371c9d4SSatish Balay x[i2 + 1] = xw[1]; 576e48d15efSToby Isaac i2 -= 2; 577e48d15efSToby Isaac idiag -= 4; 578e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 579e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 580e48d15efSToby Isaac vi = aj + diag[i] + 1; 581e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5829371c9d4SSatish Balay s[0] = xb[i2]; 5839371c9d4SSatish Balay s[1] = xb[i2 + 1]; 584e48d15efSToby Isaac for (j = 0; j < nz; j++) { 585e48d15efSToby Isaac idx = 2 * vi[j]; 586e48d15efSToby Isaac it = 4 * j; 5879371c9d4SSatish Balay xw[0] = x[idx]; 5889371c9d4SSatish Balay xw[1] = x[1 + idx]; 589e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 590e48d15efSToby Isaac } 591e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5929371c9d4SSatish Balay x[i2] = xw[0]; 5939371c9d4SSatish Balay x[i2 + 1] = xw[1]; 594e48d15efSToby Isaac idiag -= 4; 595e48d15efSToby Isaac i2 -= 2; 596e48d15efSToby Isaac } 597e48d15efSToby Isaac break; 598e48d15efSToby Isaac case 3: 5999371c9d4SSatish Balay s[0] = xb[i2]; 6009371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6019371c9d4SSatish Balay s[2] = xb[i2 + 2]; 602e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6039371c9d4SSatish Balay x[i2] = xw[0]; 6049371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6059371c9d4SSatish Balay x[i2 + 2] = xw[2]; 606e48d15efSToby Isaac i2 -= 3; 607e48d15efSToby Isaac idiag -= 9; 608e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 609e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 610e48d15efSToby Isaac vi = aj + diag[i] + 1; 611e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6129371c9d4SSatish Balay s[0] = xb[i2]; 6139371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6149371c9d4SSatish Balay s[2] = xb[i2 + 2]; 615e48d15efSToby Isaac while (nz--) { 616e48d15efSToby Isaac idx = 3 * (*vi++); 6179371c9d4SSatish Balay xw[0] = x[idx]; 6189371c9d4SSatish Balay xw[1] = x[1 + idx]; 6199371c9d4SSatish Balay xw[2] = x[2 + idx]; 620e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 621e48d15efSToby Isaac v += 9; 622e48d15efSToby Isaac } 623e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6249371c9d4SSatish Balay x[i2] = xw[0]; 6259371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6269371c9d4SSatish Balay x[i2 + 2] = xw[2]; 627e48d15efSToby Isaac idiag -= 9; 628e48d15efSToby Isaac i2 -= 3; 629e48d15efSToby Isaac } 630e48d15efSToby Isaac break; 631e48d15efSToby Isaac case 4: 6329371c9d4SSatish Balay s[0] = xb[i2]; 6339371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6349371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6359371c9d4SSatish Balay s[3] = xb[i2 + 3]; 636e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6379371c9d4SSatish Balay x[i2] = xw[0]; 6389371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6399371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6409371c9d4SSatish Balay x[i2 + 3] = xw[3]; 641e48d15efSToby Isaac i2 -= 4; 642e48d15efSToby Isaac idiag -= 16; 643e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 644e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 645e48d15efSToby Isaac vi = aj + diag[i] + 1; 646e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6479371c9d4SSatish Balay s[0] = xb[i2]; 6489371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6499371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6509371c9d4SSatish Balay s[3] = xb[i2 + 3]; 651e48d15efSToby Isaac while (nz--) { 652e48d15efSToby Isaac idx = 4 * (*vi++); 6539371c9d4SSatish Balay xw[0] = x[idx]; 6549371c9d4SSatish Balay xw[1] = x[1 + idx]; 6559371c9d4SSatish Balay xw[2] = x[2 + idx]; 6569371c9d4SSatish Balay xw[3] = x[3 + idx]; 657e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 658e48d15efSToby Isaac v += 16; 659e48d15efSToby Isaac } 660e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6619371c9d4SSatish Balay x[i2] = xw[0]; 6629371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6639371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6649371c9d4SSatish Balay x[i2 + 3] = xw[3]; 665e48d15efSToby Isaac idiag -= 16; 666e48d15efSToby Isaac i2 -= 4; 667e48d15efSToby Isaac } 668e48d15efSToby Isaac break; 669e48d15efSToby Isaac case 5: 6709371c9d4SSatish Balay s[0] = xb[i2]; 6719371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6729371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6739371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6749371c9d4SSatish Balay s[4] = xb[i2 + 4]; 675e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6769371c9d4SSatish Balay x[i2] = xw[0]; 6779371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6789371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6799371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6809371c9d4SSatish Balay x[i2 + 4] = xw[4]; 681e48d15efSToby Isaac i2 -= 5; 682e48d15efSToby Isaac idiag -= 25; 683e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 684e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 685e48d15efSToby Isaac vi = aj + diag[i] + 1; 686e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6879371c9d4SSatish Balay s[0] = xb[i2]; 6889371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6899371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6909371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6919371c9d4SSatish Balay s[4] = xb[i2 + 4]; 692e48d15efSToby Isaac while (nz--) { 693e48d15efSToby Isaac idx = 5 * (*vi++); 6949371c9d4SSatish Balay xw[0] = x[idx]; 6959371c9d4SSatish Balay xw[1] = x[1 + idx]; 6969371c9d4SSatish Balay xw[2] = x[2 + idx]; 6979371c9d4SSatish Balay xw[3] = x[3 + idx]; 6989371c9d4SSatish Balay xw[4] = x[4 + idx]; 699e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 700e48d15efSToby Isaac v += 25; 701e48d15efSToby Isaac } 702e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 7039371c9d4SSatish Balay x[i2] = xw[0]; 7049371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7059371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7069371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7079371c9d4SSatish Balay x[i2 + 4] = xw[4]; 708e48d15efSToby Isaac idiag -= 25; 709e48d15efSToby Isaac i2 -= 5; 710e48d15efSToby Isaac } 711e48d15efSToby Isaac break; 712e48d15efSToby Isaac case 6: 7139371c9d4SSatish Balay s[0] = xb[i2]; 7149371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7159371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7169371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7179371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7189371c9d4SSatish Balay s[5] = xb[i2 + 5]; 719e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7209371c9d4SSatish Balay x[i2] = xw[0]; 7219371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7229371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7239371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7249371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7259371c9d4SSatish Balay x[i2 + 5] = xw[5]; 726e48d15efSToby Isaac i2 -= 6; 727e48d15efSToby Isaac idiag -= 36; 728e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 729e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 730e48d15efSToby Isaac vi = aj + diag[i] + 1; 731e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7329371c9d4SSatish Balay s[0] = xb[i2]; 7339371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7349371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7359371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7369371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7379371c9d4SSatish Balay s[5] = xb[i2 + 5]; 738e48d15efSToby Isaac while (nz--) { 739e48d15efSToby Isaac idx = 6 * (*vi++); 7409371c9d4SSatish Balay xw[0] = x[idx]; 7419371c9d4SSatish Balay xw[1] = x[1 + idx]; 7429371c9d4SSatish Balay xw[2] = x[2 + idx]; 7439371c9d4SSatish Balay xw[3] = x[3 + idx]; 7449371c9d4SSatish Balay xw[4] = x[4 + idx]; 7459371c9d4SSatish Balay xw[5] = x[5 + idx]; 746e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 747e48d15efSToby Isaac v += 36; 748e48d15efSToby Isaac } 749e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7509371c9d4SSatish Balay x[i2] = xw[0]; 7519371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7529371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7539371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7549371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7559371c9d4SSatish Balay x[i2 + 5] = xw[5]; 756e48d15efSToby Isaac idiag -= 36; 757e48d15efSToby Isaac i2 -= 6; 758e48d15efSToby Isaac } 759e48d15efSToby Isaac break; 760e48d15efSToby Isaac case 7: 7619371c9d4SSatish Balay s[0] = xb[i2]; 7629371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7639371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7649371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7659371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7669371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7679371c9d4SSatish Balay s[6] = xb[i2 + 6]; 768e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7699371c9d4SSatish Balay x[i2] = xw[0]; 7709371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7719371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7729371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7739371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7749371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7759371c9d4SSatish Balay x[i2 + 6] = xw[6]; 776e48d15efSToby Isaac i2 -= 7; 777e48d15efSToby Isaac idiag -= 49; 778e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 779e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 780e48d15efSToby Isaac vi = aj + diag[i] + 1; 781e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7829371c9d4SSatish Balay s[0] = xb[i2]; 7839371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7849371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7859371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7869371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7879371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7889371c9d4SSatish Balay s[6] = xb[i2 + 6]; 789e48d15efSToby Isaac while (nz--) { 790e48d15efSToby Isaac idx = 7 * (*vi++); 7919371c9d4SSatish Balay xw[0] = x[idx]; 7929371c9d4SSatish Balay xw[1] = x[1 + idx]; 7939371c9d4SSatish Balay xw[2] = x[2 + idx]; 7949371c9d4SSatish Balay xw[3] = x[3 + idx]; 7959371c9d4SSatish Balay xw[4] = x[4 + idx]; 7969371c9d4SSatish Balay xw[5] = x[5 + idx]; 7979371c9d4SSatish Balay xw[6] = x[6 + idx]; 798e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 799e48d15efSToby Isaac v += 49; 800e48d15efSToby Isaac } 801e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 8029371c9d4SSatish Balay x[i2] = xw[0]; 8039371c9d4SSatish Balay x[i2 + 1] = xw[1]; 8049371c9d4SSatish Balay x[i2 + 2] = xw[2]; 8059371c9d4SSatish Balay x[i2 + 3] = xw[3]; 8069371c9d4SSatish Balay x[i2 + 4] = xw[4]; 8079371c9d4SSatish Balay x[i2 + 5] = xw[5]; 8089371c9d4SSatish Balay x[i2 + 6] = xw[6]; 809e48d15efSToby Isaac idiag -= 49; 810e48d15efSToby Isaac i2 -= 7; 811e48d15efSToby Isaac } 812e48d15efSToby Isaac break; 813e48d15efSToby Isaac default: 8149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 81596b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 816de80f912SBarry Smith i2 -= bs; 817e48d15efSToby Isaac idiag -= bs2; 818de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 819de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 820de80f912SBarry Smith vi = aj + diag[i] + 1; 821de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 822de80f912SBarry Smith 8239566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 824de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 825de80f912SBarry Smith workt = work; 826de80f912SBarry Smith for (j = 0; j < nz; j++) { 8279566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 828de80f912SBarry Smith workt += bs; 829de80f912SBarry Smith } 83096b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 83196b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 832e48d15efSToby Isaac 833de80f912SBarry Smith idiag -= bs2; 834de80f912SBarry Smith i2 -= bs; 835de80f912SBarry Smith } 836e48d15efSToby Isaac break; 837e48d15efSToby Isaac } 8389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 839de80f912SBarry Smith } 840e48d15efSToby Isaac its--; 841e48d15efSToby Isaac } 842e48d15efSToby Isaac while (its--) { 843e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 844e48d15efSToby Isaac idiag = a->idiag; 845e48d15efSToby Isaac i2 = 0; 846e48d15efSToby Isaac switch (bs) { 847e48d15efSToby Isaac case 1: 848e48d15efSToby Isaac for (i = 0; i < m; i++) { 849e48d15efSToby Isaac v = aa + ai[i]; 850e48d15efSToby Isaac vi = aj + ai[i]; 851e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 852e48d15efSToby Isaac s[0] = b[i2]; 853e48d15efSToby Isaac for (j = 0; j < nz; j++) { 854e48d15efSToby Isaac xw[0] = x[vi[j]]; 855e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 856e48d15efSToby Isaac } 857e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 858e48d15efSToby Isaac x[i2] += xw[0]; 859e48d15efSToby Isaac idiag += 1; 860e48d15efSToby Isaac i2 += 1; 861e48d15efSToby Isaac } 862e48d15efSToby Isaac break; 863e48d15efSToby Isaac case 2: 864e48d15efSToby Isaac for (i = 0; i < m; i++) { 865e48d15efSToby Isaac v = aa + 4 * ai[i]; 866e48d15efSToby Isaac vi = aj + ai[i]; 867e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8689371c9d4SSatish Balay s[0] = b[i2]; 8699371c9d4SSatish Balay s[1] = b[i2 + 1]; 870e48d15efSToby Isaac for (j = 0; j < nz; j++) { 871e48d15efSToby Isaac idx = 2 * vi[j]; 872e48d15efSToby Isaac it = 4 * j; 8739371c9d4SSatish Balay xw[0] = x[idx]; 8749371c9d4SSatish Balay xw[1] = x[1 + idx]; 875e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 876e48d15efSToby Isaac } 877e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8789371c9d4SSatish Balay x[i2] += xw[0]; 8799371c9d4SSatish Balay x[i2 + 1] += xw[1]; 880e48d15efSToby Isaac idiag += 4; 881e48d15efSToby Isaac i2 += 2; 882e48d15efSToby Isaac } 883e48d15efSToby Isaac break; 884e48d15efSToby Isaac case 3: 885e48d15efSToby Isaac for (i = 0; i < m; i++) { 886e48d15efSToby Isaac v = aa + 9 * ai[i]; 887e48d15efSToby Isaac vi = aj + ai[i]; 888e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8899371c9d4SSatish Balay s[0] = b[i2]; 8909371c9d4SSatish Balay s[1] = b[i2 + 1]; 8919371c9d4SSatish Balay s[2] = b[i2 + 2]; 892e48d15efSToby Isaac while (nz--) { 893e48d15efSToby Isaac idx = 3 * (*vi++); 8949371c9d4SSatish Balay xw[0] = x[idx]; 8959371c9d4SSatish Balay xw[1] = x[1 + idx]; 8969371c9d4SSatish Balay xw[2] = x[2 + idx]; 897e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 898e48d15efSToby Isaac v += 9; 899e48d15efSToby Isaac } 900e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 9019371c9d4SSatish Balay x[i2] += xw[0]; 9029371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9039371c9d4SSatish Balay x[i2 + 2] += xw[2]; 904e48d15efSToby Isaac idiag += 9; 905e48d15efSToby Isaac i2 += 3; 906e48d15efSToby Isaac } 907e48d15efSToby Isaac break; 908e48d15efSToby Isaac case 4: 909e48d15efSToby Isaac for (i = 0; i < m; i++) { 910e48d15efSToby Isaac v = aa + 16 * ai[i]; 911e48d15efSToby Isaac vi = aj + ai[i]; 912e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9139371c9d4SSatish Balay s[0] = b[i2]; 9149371c9d4SSatish Balay s[1] = b[i2 + 1]; 9159371c9d4SSatish Balay s[2] = b[i2 + 2]; 9169371c9d4SSatish Balay s[3] = b[i2 + 3]; 917e48d15efSToby Isaac while (nz--) { 918e48d15efSToby Isaac idx = 4 * (*vi++); 9199371c9d4SSatish Balay xw[0] = x[idx]; 9209371c9d4SSatish Balay xw[1] = x[1 + idx]; 9219371c9d4SSatish Balay xw[2] = x[2 + idx]; 9229371c9d4SSatish Balay xw[3] = x[3 + idx]; 923e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 924e48d15efSToby Isaac v += 16; 925e48d15efSToby Isaac } 926e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9279371c9d4SSatish Balay x[i2] += xw[0]; 9289371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9299371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9309371c9d4SSatish Balay x[i2 + 3] += xw[3]; 931e48d15efSToby Isaac idiag += 16; 932e48d15efSToby Isaac i2 += 4; 933e48d15efSToby Isaac } 934e48d15efSToby Isaac break; 935e48d15efSToby Isaac case 5: 936e48d15efSToby Isaac for (i = 0; i < m; i++) { 937e48d15efSToby Isaac v = aa + 25 * ai[i]; 938e48d15efSToby Isaac vi = aj + ai[i]; 939e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9409371c9d4SSatish Balay s[0] = b[i2]; 9419371c9d4SSatish Balay s[1] = b[i2 + 1]; 9429371c9d4SSatish Balay s[2] = b[i2 + 2]; 9439371c9d4SSatish Balay s[3] = b[i2 + 3]; 9449371c9d4SSatish Balay s[4] = b[i2 + 4]; 945e48d15efSToby Isaac while (nz--) { 946e48d15efSToby Isaac idx = 5 * (*vi++); 9479371c9d4SSatish Balay xw[0] = x[idx]; 9489371c9d4SSatish Balay xw[1] = x[1 + idx]; 9499371c9d4SSatish Balay xw[2] = x[2 + idx]; 9509371c9d4SSatish Balay xw[3] = x[3 + idx]; 9519371c9d4SSatish Balay xw[4] = x[4 + idx]; 952e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 953e48d15efSToby Isaac v += 25; 954e48d15efSToby Isaac } 955e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9569371c9d4SSatish Balay x[i2] += xw[0]; 9579371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9589371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9599371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9609371c9d4SSatish Balay x[i2 + 4] += xw[4]; 961e48d15efSToby Isaac idiag += 25; 962e48d15efSToby Isaac i2 += 5; 963e48d15efSToby Isaac } 964e48d15efSToby Isaac break; 965e48d15efSToby Isaac case 6: 966e48d15efSToby Isaac for (i = 0; i < m; i++) { 967e48d15efSToby Isaac v = aa + 36 * ai[i]; 968e48d15efSToby Isaac vi = aj + ai[i]; 969e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9709371c9d4SSatish Balay s[0] = b[i2]; 9719371c9d4SSatish Balay s[1] = b[i2 + 1]; 9729371c9d4SSatish Balay s[2] = b[i2 + 2]; 9739371c9d4SSatish Balay s[3] = b[i2 + 3]; 9749371c9d4SSatish Balay s[4] = b[i2 + 4]; 9759371c9d4SSatish Balay s[5] = b[i2 + 5]; 976e48d15efSToby Isaac while (nz--) { 977e48d15efSToby Isaac idx = 6 * (*vi++); 9789371c9d4SSatish Balay xw[0] = x[idx]; 9799371c9d4SSatish Balay xw[1] = x[1 + idx]; 9809371c9d4SSatish Balay xw[2] = x[2 + idx]; 9819371c9d4SSatish Balay xw[3] = x[3 + idx]; 9829371c9d4SSatish Balay xw[4] = x[4 + idx]; 9839371c9d4SSatish Balay xw[5] = x[5 + idx]; 984e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 985e48d15efSToby Isaac v += 36; 986e48d15efSToby Isaac } 987e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9889371c9d4SSatish Balay x[i2] += xw[0]; 9899371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9909371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9919371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9929371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9939371c9d4SSatish Balay x[i2 + 5] += xw[5]; 994e48d15efSToby Isaac idiag += 36; 995e48d15efSToby Isaac i2 += 6; 996e48d15efSToby Isaac } 997e48d15efSToby Isaac break; 998e48d15efSToby Isaac case 7: 999e48d15efSToby Isaac for (i = 0; i < m; i++) { 1000e48d15efSToby Isaac v = aa + 49 * ai[i]; 1001e48d15efSToby Isaac vi = aj + ai[i]; 1002e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10039371c9d4SSatish Balay s[0] = b[i2]; 10049371c9d4SSatish Balay s[1] = b[i2 + 1]; 10059371c9d4SSatish Balay s[2] = b[i2 + 2]; 10069371c9d4SSatish Balay s[3] = b[i2 + 3]; 10079371c9d4SSatish Balay s[4] = b[i2 + 4]; 10089371c9d4SSatish Balay s[5] = b[i2 + 5]; 10099371c9d4SSatish Balay s[6] = b[i2 + 6]; 1010e48d15efSToby Isaac while (nz--) { 1011e48d15efSToby Isaac idx = 7 * (*vi++); 10129371c9d4SSatish Balay xw[0] = x[idx]; 10139371c9d4SSatish Balay xw[1] = x[1 + idx]; 10149371c9d4SSatish Balay xw[2] = x[2 + idx]; 10159371c9d4SSatish Balay xw[3] = x[3 + idx]; 10169371c9d4SSatish Balay xw[4] = x[4 + idx]; 10179371c9d4SSatish Balay xw[5] = x[5 + idx]; 10189371c9d4SSatish Balay xw[6] = x[6 + idx]; 1019e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1020e48d15efSToby Isaac v += 49; 1021e48d15efSToby Isaac } 1022e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10239371c9d4SSatish Balay x[i2] += xw[0]; 10249371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10259371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10269371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10279371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10289371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10299371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1030e48d15efSToby Isaac idiag += 49; 1031e48d15efSToby Isaac i2 += 7; 1032e48d15efSToby Isaac } 1033e48d15efSToby Isaac break; 1034e48d15efSToby Isaac default: 1035e48d15efSToby Isaac for (i = 0; i < m; i++) { 1036e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1037e48d15efSToby Isaac vi = aj + ai[i]; 1038e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1039e48d15efSToby Isaac 10409566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1041e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1042e48d15efSToby Isaac workt = work; 1043e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1045e48d15efSToby Isaac workt += bs; 1046e48d15efSToby Isaac } 1047e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1048e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1049e48d15efSToby Isaac 1050e48d15efSToby Isaac idiag += bs2; 1051e48d15efSToby Isaac i2 += bs; 1052e48d15efSToby Isaac } 1053e48d15efSToby Isaac break; 1054e48d15efSToby Isaac } 10559566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1056e48d15efSToby Isaac } 1057e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1058e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1059e48d15efSToby Isaac i2 = bs * (m - 1); 1060e48d15efSToby Isaac switch (bs) { 1061e48d15efSToby Isaac case 1: 1062e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1063e48d15efSToby Isaac v = aa + ai[i]; 1064e48d15efSToby Isaac vi = aj + ai[i]; 1065e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1066e48d15efSToby Isaac s[0] = b[i2]; 1067e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1068e48d15efSToby Isaac xw[0] = x[vi[j]]; 1069e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1070e48d15efSToby Isaac } 1071e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1072e48d15efSToby Isaac x[i2] += xw[0]; 1073e48d15efSToby Isaac idiag -= 1; 1074e48d15efSToby Isaac i2 -= 1; 1075e48d15efSToby Isaac } 1076e48d15efSToby Isaac break; 1077e48d15efSToby Isaac case 2: 1078e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1079e48d15efSToby Isaac v = aa + 4 * ai[i]; 1080e48d15efSToby Isaac vi = aj + ai[i]; 1081e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10829371c9d4SSatish Balay s[0] = b[i2]; 10839371c9d4SSatish Balay s[1] = b[i2 + 1]; 1084e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1085e48d15efSToby Isaac idx = 2 * vi[j]; 1086e48d15efSToby Isaac it = 4 * j; 10879371c9d4SSatish Balay xw[0] = x[idx]; 10889371c9d4SSatish Balay xw[1] = x[1 + idx]; 1089e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1090e48d15efSToby Isaac } 1091e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10929371c9d4SSatish Balay x[i2] += xw[0]; 10939371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1094e48d15efSToby Isaac idiag -= 4; 1095e48d15efSToby Isaac i2 -= 2; 1096e48d15efSToby Isaac } 1097e48d15efSToby Isaac break; 1098e48d15efSToby Isaac case 3: 1099e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1100e48d15efSToby Isaac v = aa + 9 * ai[i]; 1101e48d15efSToby Isaac vi = aj + ai[i]; 1102e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11039371c9d4SSatish Balay s[0] = b[i2]; 11049371c9d4SSatish Balay s[1] = b[i2 + 1]; 11059371c9d4SSatish Balay s[2] = b[i2 + 2]; 1106e48d15efSToby Isaac while (nz--) { 1107e48d15efSToby Isaac idx = 3 * (*vi++); 11089371c9d4SSatish Balay xw[0] = x[idx]; 11099371c9d4SSatish Balay xw[1] = x[1 + idx]; 11109371c9d4SSatish Balay xw[2] = x[2 + idx]; 1111e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1112e48d15efSToby Isaac v += 9; 1113e48d15efSToby Isaac } 1114e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11159371c9d4SSatish Balay x[i2] += xw[0]; 11169371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11179371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1118e48d15efSToby Isaac idiag -= 9; 1119e48d15efSToby Isaac i2 -= 3; 1120e48d15efSToby Isaac } 1121e48d15efSToby Isaac break; 1122e48d15efSToby Isaac case 4: 1123e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1124e48d15efSToby Isaac v = aa + 16 * ai[i]; 1125e48d15efSToby Isaac vi = aj + ai[i]; 1126e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11279371c9d4SSatish Balay s[0] = b[i2]; 11289371c9d4SSatish Balay s[1] = b[i2 + 1]; 11299371c9d4SSatish Balay s[2] = b[i2 + 2]; 11309371c9d4SSatish Balay s[3] = b[i2 + 3]; 1131e48d15efSToby Isaac while (nz--) { 1132e48d15efSToby Isaac idx = 4 * (*vi++); 11339371c9d4SSatish Balay xw[0] = x[idx]; 11349371c9d4SSatish Balay xw[1] = x[1 + idx]; 11359371c9d4SSatish Balay xw[2] = x[2 + idx]; 11369371c9d4SSatish Balay xw[3] = x[3 + idx]; 1137e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1138e48d15efSToby Isaac v += 16; 1139e48d15efSToby Isaac } 1140e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11419371c9d4SSatish Balay x[i2] += xw[0]; 11429371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11439371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11449371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1145e48d15efSToby Isaac idiag -= 16; 1146e48d15efSToby Isaac i2 -= 4; 1147e48d15efSToby Isaac } 1148e48d15efSToby Isaac break; 1149e48d15efSToby Isaac case 5: 1150e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1151e48d15efSToby Isaac v = aa + 25 * ai[i]; 1152e48d15efSToby Isaac vi = aj + ai[i]; 1153e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11549371c9d4SSatish Balay s[0] = b[i2]; 11559371c9d4SSatish Balay s[1] = b[i2 + 1]; 11569371c9d4SSatish Balay s[2] = b[i2 + 2]; 11579371c9d4SSatish Balay s[3] = b[i2 + 3]; 11589371c9d4SSatish Balay s[4] = b[i2 + 4]; 1159e48d15efSToby Isaac while (nz--) { 1160e48d15efSToby Isaac idx = 5 * (*vi++); 11619371c9d4SSatish Balay xw[0] = x[idx]; 11629371c9d4SSatish Balay xw[1] = x[1 + idx]; 11639371c9d4SSatish Balay xw[2] = x[2 + idx]; 11649371c9d4SSatish Balay xw[3] = x[3 + idx]; 11659371c9d4SSatish Balay xw[4] = x[4 + idx]; 1166e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1167e48d15efSToby Isaac v += 25; 1168e48d15efSToby Isaac } 1169e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11709371c9d4SSatish Balay x[i2] += xw[0]; 11719371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11729371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11739371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11749371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1175e48d15efSToby Isaac idiag -= 25; 1176e48d15efSToby Isaac i2 -= 5; 1177e48d15efSToby Isaac } 1178e48d15efSToby Isaac break; 1179e48d15efSToby Isaac case 6: 1180e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1181e48d15efSToby Isaac v = aa + 36 * ai[i]; 1182e48d15efSToby Isaac vi = aj + ai[i]; 1183e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11849371c9d4SSatish Balay s[0] = b[i2]; 11859371c9d4SSatish Balay s[1] = b[i2 + 1]; 11869371c9d4SSatish Balay s[2] = b[i2 + 2]; 11879371c9d4SSatish Balay s[3] = b[i2 + 3]; 11889371c9d4SSatish Balay s[4] = b[i2 + 4]; 11899371c9d4SSatish Balay s[5] = b[i2 + 5]; 1190e48d15efSToby Isaac while (nz--) { 1191e48d15efSToby Isaac idx = 6 * (*vi++); 11929371c9d4SSatish Balay xw[0] = x[idx]; 11939371c9d4SSatish Balay xw[1] = x[1 + idx]; 11949371c9d4SSatish Balay xw[2] = x[2 + idx]; 11959371c9d4SSatish Balay xw[3] = x[3 + idx]; 11969371c9d4SSatish Balay xw[4] = x[4 + idx]; 11979371c9d4SSatish Balay xw[5] = x[5 + idx]; 1198e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1199e48d15efSToby Isaac v += 36; 1200e48d15efSToby Isaac } 1201e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 12029371c9d4SSatish Balay x[i2] += xw[0]; 12039371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12049371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12059371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12069371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12079371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1208e48d15efSToby Isaac idiag -= 36; 1209e48d15efSToby Isaac i2 -= 6; 1210e48d15efSToby Isaac } 1211e48d15efSToby Isaac break; 1212e48d15efSToby Isaac case 7: 1213e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1214e48d15efSToby Isaac v = aa + 49 * ai[i]; 1215e48d15efSToby Isaac vi = aj + ai[i]; 1216e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12179371c9d4SSatish Balay s[0] = b[i2]; 12189371c9d4SSatish Balay s[1] = b[i2 + 1]; 12199371c9d4SSatish Balay s[2] = b[i2 + 2]; 12209371c9d4SSatish Balay s[3] = b[i2 + 3]; 12219371c9d4SSatish Balay s[4] = b[i2 + 4]; 12229371c9d4SSatish Balay s[5] = b[i2 + 5]; 12239371c9d4SSatish Balay s[6] = b[i2 + 6]; 1224e48d15efSToby Isaac while (nz--) { 1225e48d15efSToby Isaac idx = 7 * (*vi++); 12269371c9d4SSatish Balay xw[0] = x[idx]; 12279371c9d4SSatish Balay xw[1] = x[1 + idx]; 12289371c9d4SSatish Balay xw[2] = x[2 + idx]; 12299371c9d4SSatish Balay xw[3] = x[3 + idx]; 12309371c9d4SSatish Balay xw[4] = x[4 + idx]; 12319371c9d4SSatish Balay xw[5] = x[5 + idx]; 12329371c9d4SSatish Balay xw[6] = x[6 + idx]; 1233e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1234e48d15efSToby Isaac v += 49; 1235e48d15efSToby Isaac } 1236e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12379371c9d4SSatish Balay x[i2] += xw[0]; 12389371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12399371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12409371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12419371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12429371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12439371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1244e48d15efSToby Isaac idiag -= 49; 1245e48d15efSToby Isaac i2 -= 7; 1246e48d15efSToby Isaac } 1247e48d15efSToby Isaac break; 1248e48d15efSToby Isaac default: 1249e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1250e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1251e48d15efSToby Isaac vi = aj + ai[i]; 1252e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1253e48d15efSToby Isaac 12549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1255e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1256e48d15efSToby Isaac workt = work; 1257e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12589566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1259e48d15efSToby Isaac workt += bs; 1260e48d15efSToby Isaac } 1261e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1262e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1263e48d15efSToby Isaac 1264e48d15efSToby Isaac idiag -= bs2; 1265e48d15efSToby Isaac i2 -= bs; 1266e48d15efSToby Isaac } 1267e48d15efSToby Isaac break; 1268e48d15efSToby Isaac } 12699566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1270e48d15efSToby Isaac } 1271e48d15efSToby Isaac } 12729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1275de80f912SBarry Smith } 1276de80f912SBarry Smith 1277af674e45SBarry Smith /* 127881824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1279af674e45SBarry Smith */ 1280af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1281af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1282af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1283af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1284af674e45SBarry Smith #endif 1285af674e45SBarry Smith 1286d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1287d71ae5a4SJacob Faibussowitsch { 1288af674e45SBarry Smith Mat A = *AA; 1289af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1290c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1291c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 129217ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1293f15d580aSBarry Smith const PetscScalar *value = v; 12944bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1295af674e45SBarry Smith 1296af674e45SBarry Smith PetscFunctionBegin; 1297ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1298af674e45SBarry Smith stepval = (n - 1) * 4; 1299af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1300af674e45SBarry Smith row = im[k]; 1301af674e45SBarry Smith rp = aj + ai[row]; 1302af674e45SBarry Smith ap = aa + 16 * ai[row]; 1303af674e45SBarry Smith nrow = ailen[row]; 1304af674e45SBarry Smith low = 0; 130517ec6a02SBarry Smith high = nrow; 1306af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1307af674e45SBarry Smith col = in[l]; 1308db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1309db4deed7SKarl Rupp else high = nrow; 131017ec6a02SBarry Smith lastcol = col; 13111e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1312af674e45SBarry Smith while (high - low > 7) { 1313af674e45SBarry Smith t = (low + high) / 2; 1314af674e45SBarry Smith if (rp[t] > col) high = t; 1315af674e45SBarry Smith else low = t; 1316af674e45SBarry Smith } 1317af674e45SBarry Smith for (i = low; i < high; i++) { 1318af674e45SBarry Smith if (rp[i] > col) break; 1319af674e45SBarry Smith if (rp[i] == col) { 1320af674e45SBarry Smith bap = ap + 16 * i; 1321af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1322ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1323af674e45SBarry Smith } 1324af674e45SBarry Smith goto noinsert2; 1325af674e45SBarry Smith } 1326af674e45SBarry Smith } 1327af674e45SBarry Smith N = nrow++ - 1; 132817ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1329af674e45SBarry Smith /* shift up all the later entries in this row */ 1330af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1331af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13329566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1333af674e45SBarry Smith } 133448a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1335af674e45SBarry Smith rp[i] = col; 1336af674e45SBarry Smith bap = ap + 16 * i; 1337af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1338ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1339af674e45SBarry Smith } 1340af674e45SBarry Smith noinsert2:; 1341af674e45SBarry Smith low = i; 1342af674e45SBarry Smith } 1343af674e45SBarry Smith ailen[row] = nrow; 1344af674e45SBarry Smith } 1345be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1346af674e45SBarry Smith } 1347af674e45SBarry Smith 1348af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1349af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1350af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1351af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1352af674e45SBarry Smith #endif 1353af674e45SBarry Smith 1354d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1355d71ae5a4SJacob Faibussowitsch { 1356af674e45SBarry Smith Mat A = *AA; 1357af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1358580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1359c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1360c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 136117ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1362af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1363af674e45SBarry Smith 1364af674e45SBarry Smith PetscFunctionBegin; 1365af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13669371c9d4SSatish Balay row = im[k]; 13679371c9d4SSatish Balay brow = row / 4; 1368af674e45SBarry Smith rp = aj + ai[brow]; 1369af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1370af674e45SBarry Smith nrow = ailen[brow]; 1371af674e45SBarry Smith low = 0; 137217ec6a02SBarry Smith high = nrow; 1373af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13749371c9d4SSatish Balay col = in[l]; 13759371c9d4SSatish Balay bcol = col / 4; 13769371c9d4SSatish Balay ridx = row % 4; 13779371c9d4SSatish Balay cidx = col % 4; 1378af674e45SBarry Smith value = v[l + k * n]; 1379db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1380db4deed7SKarl Rupp else high = nrow; 138117ec6a02SBarry Smith lastcol = col; 1382af674e45SBarry Smith while (high - low > 7) { 1383af674e45SBarry Smith t = (low + high) / 2; 1384af674e45SBarry Smith if (rp[t] > bcol) high = t; 1385af674e45SBarry Smith else low = t; 1386af674e45SBarry Smith } 1387af674e45SBarry Smith for (i = low; i < high; i++) { 1388af674e45SBarry Smith if (rp[i] > bcol) break; 1389af674e45SBarry Smith if (rp[i] == bcol) { 1390af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1391af674e45SBarry Smith *bap += value; 1392af674e45SBarry Smith goto noinsert1; 1393af674e45SBarry Smith } 1394af674e45SBarry Smith } 1395af674e45SBarry Smith N = nrow++ - 1; 139617ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1397af674e45SBarry Smith /* shift up all the later entries in this row */ 13989566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 13999566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 14009566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1401af674e45SBarry Smith rp[i] = bcol; 1402af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1403af674e45SBarry Smith noinsert1:; 1404af674e45SBarry Smith low = i; 1405af674e45SBarry Smith } 1406af674e45SBarry Smith ailen[brow] = nrow; 1407af674e45SBarry Smith } 1408be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1409af674e45SBarry Smith } 1410af674e45SBarry Smith 1411be5855fcSBarry Smith /* 1412be5855fcSBarry Smith Checks for missing diagonals 1413be5855fcSBarry Smith */ 1414d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1415d71ae5a4SJacob Faibussowitsch { 1416be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14177734d3b5SMatthew G. Knepley PetscInt *diag, *ii = a->i, i; 1418be5855fcSBarry Smith 1419be5855fcSBarry Smith PetscFunctionBegin; 14209566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 14212af78befSBarry Smith *missing = PETSC_FALSE; 14227734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 14232efa7f71SHong Zhang *missing = PETSC_TRUE; 14242efa7f71SHong Zhang if (d) *d = 0; 14259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 14262efa7f71SHong Zhang } else { 142701445905SHong Zhang PetscInt n; 142801445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1429883fce79SBarry Smith diag = a->diag; 143001445905SHong Zhang for (i = 0; i < n; i++) { 14317734d3b5SMatthew G. Knepley if (diag[i] >= ii[i + 1]) { 14322af78befSBarry Smith *missing = PETSC_TRUE; 14332af78befSBarry Smith if (d) *d = i; 14349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1435358d2f5dSShri Abhyankar break; 14362efa7f71SHong Zhang } 1437be5855fcSBarry Smith } 1438be5855fcSBarry Smith } 14393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1440be5855fcSBarry Smith } 1441be5855fcSBarry Smith 1442d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1443d71ae5a4SJacob Faibussowitsch { 1444de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 144509f38230SBarry Smith PetscInt i, j, m = a->mbs; 1446de6a44a3SBarry Smith 14473a40ed3dSBarry Smith PetscFunctionBegin; 144809f38230SBarry Smith if (!a->diag) { 14499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->diag)); 14504fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 145109f38230SBarry Smith } 14527fc0212eSBarry Smith for (i = 0; i < m; i++) { 145309f38230SBarry Smith a->diag[i] = a->i[i + 1]; 1454de6a44a3SBarry Smith for (j = a->i[i]; j < a->i[i + 1]; j++) { 1455de6a44a3SBarry Smith if (a->j[j] == i) { 145609f38230SBarry Smith a->diag[i] = j; 1457de6a44a3SBarry Smith break; 1458de6a44a3SBarry Smith } 1459de6a44a3SBarry Smith } 1460de6a44a3SBarry Smith } 14613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1462de6a44a3SBarry Smith } 14632593348eSBarry Smith 1464d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1465d71ae5a4SJacob Faibussowitsch { 14663b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14671a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14681a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14693b2fbd54SBarry Smith 14703a40ed3dSBarry Smith PetscFunctionBegin; 14713b2fbd54SBarry Smith *nn = n; 14723ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 14733b2fbd54SBarry Smith if (symmetric) { 14749566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1475553b3c51SBarry Smith nz = tia[n]; 14763b2fbd54SBarry Smith } else { 14779371c9d4SSatish Balay tia = a->i; 14789371c9d4SSatish Balay tja = a->j; 14793b2fbd54SBarry Smith } 14803b2fbd54SBarry Smith 1481ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1482ecc77c7aSBarry Smith (*nn) *= bs; 14838f7157efSSatish Balay /* malloc & create the natural set of indices */ 14849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14859985e31cSBarry Smith if (n) { 14862462f5fdSStefano Zampini (*ia)[0] = oshift; 1487ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 14889985e31cSBarry Smith } 1489ecc77c7aSBarry Smith 1490ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1491ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1492ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 14938f7157efSSatish Balay } 1494ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1495ecc77c7aSBarry Smith 14961a83f524SJed Brown if (inja) { 14979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 14989985e31cSBarry Smith cnt = 0; 14999985e31cSBarry Smith for (i = 0; i < n; i++) { 15009985e31cSBarry Smith for (j = 0; j < bs; j++) { 15019985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 1502ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 15039985e31cSBarry Smith } 15049985e31cSBarry Smith } 15059985e31cSBarry Smith } 15069985e31cSBarry Smith } 15079985e31cSBarry Smith 15088f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 15099566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 15109566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 15118f7157efSSatish Balay } 1512f6d58c54SBarry Smith } else if (oshift == 1) { 1513715a17b5SBarry Smith if (symmetric) { 1514a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1515715a17b5SBarry Smith /* add 1 to i and j indices */ 1516715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1517715a17b5SBarry Smith *ia = tia; 1518715a17b5SBarry Smith if (ja) { 1519715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1520715a17b5SBarry Smith *ja = tja; 1521715a17b5SBarry Smith } 1522715a17b5SBarry Smith } else { 1523a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1524f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 15259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1526f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1527f6d58c54SBarry Smith if (ja) { 15289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1529f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1530f6d58c54SBarry Smith } 1531715a17b5SBarry Smith } 15328f7157efSSatish Balay } else { 15338f7157efSSatish Balay *ia = tia; 1534ecc77c7aSBarry Smith if (ja) *ja = tja; 15358f7157efSSatish Balay } 15363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15373b2fbd54SBarry Smith } 15383b2fbd54SBarry Smith 1539d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1540d71ae5a4SJacob Faibussowitsch { 15413a40ed3dSBarry Smith PetscFunctionBegin; 15423ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1543715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 15449566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 15459566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 15463b2fbd54SBarry Smith } 15473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15483b2fbd54SBarry Smith } 15493b2fbd54SBarry Smith 1550d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1551d71ae5a4SJacob Faibussowitsch { 15522d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15532d61bbb3SSatish Balay 1554433994e6SBarry Smith PetscFunctionBegin; 1555b4e2f619SBarry Smith if (A->hash_active) { 1556b4e2f619SBarry Smith PetscInt bs; 1557e3c72094SPierre Jolivet A->ops[0] = a->cops; 1558b4e2f619SBarry Smith PetscCall(PetscHMapIJVDestroy(&a->ht)); 1559b4e2f619SBarry Smith PetscCall(MatGetBlockSize(A, &bs)); 1560b4e2f619SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1561b4e2f619SBarry Smith PetscCall(PetscFree(a->dnz)); 1562b4e2f619SBarry Smith PetscCall(PetscFree(a->bdnz)); 1563b4e2f619SBarry Smith A->hash_active = PETSC_FALSE; 1564b4e2f619SBarry Smith } 15653ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 15669566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 15699566063dSJacob Faibussowitsch if (a->free_diag) PetscCall(PetscFree(a->diag)); 15709566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15719566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15729566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15739566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15749566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15759566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15779566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15789566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1579c4319e64SHong Zhang 15809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15819566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15829566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1583901853e0SKris Buschelman 15849566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 15967ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 15979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 15987ea3e4caSstefano_zampini #endif 15999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 16002e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 16013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16022d61bbb3SSatish Balay } 16032d61bbb3SSatish Balay 160466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1605d71ae5a4SJacob Faibussowitsch { 16062d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 16072d61bbb3SSatish Balay 16082d61bbb3SSatish Balay PetscFunctionBegin; 1609aa275fccSKris Buschelman switch (op) { 1610d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 1611d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 1612d71ae5a4SJacob Faibussowitsch break; 1613d71ae5a4SJacob Faibussowitsch case MAT_KEEP_NONZERO_PATTERN: 1614d71ae5a4SJacob Faibussowitsch a->keepnonzeropattern = flg; 1615d71ae5a4SJacob Faibussowitsch break; 1616d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATIONS: 1617d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? 0 : 1); 1618d71ae5a4SJacob Faibussowitsch break; 1619d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATION_ERR: 1620d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -1 : 0); 1621d71ae5a4SJacob Faibussowitsch break; 1622d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_ALLOCATION_ERR: 1623d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -2 : 0); 1624d71ae5a4SJacob Faibussowitsch break; 1625d71ae5a4SJacob Faibussowitsch case MAT_UNUSED_NONZERO_LOCATION_ERR: 1626d71ae5a4SJacob Faibussowitsch a->nounused = (flg ? -1 : 0); 1627d71ae5a4SJacob Faibussowitsch break; 1628d71ae5a4SJacob Faibussowitsch default: 1629888c827cSStefano Zampini break; 16302d61bbb3SSatish Balay } 16313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16322d61bbb3SSatish Balay } 16332d61bbb3SSatish Balay 163452768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1636d71ae5a4SJacob Faibussowitsch { 163752768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 163852768537SHong Zhang MatScalar *aa_i; 163987828ca2SBarry Smith PetscScalar *v_i; 16402d61bbb3SSatish Balay 16412d61bbb3SSatish Balay PetscFunctionBegin; 1642d0f46423SBarry Smith bs = A->rmap->bs; 164352768537SHong Zhang bs2 = bs * bs; 16445f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 16452d61bbb3SSatish Balay 16462d61bbb3SSatish Balay bn = row / bs; /* Block number */ 16472d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 16482d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 16492d61bbb3SSatish Balay *nz = bs * M; 16502d61bbb3SSatish Balay 16512d61bbb3SSatish Balay if (v) { 1652f4259b30SLisandro Dalcin *v = NULL; 16532d61bbb3SSatish Balay if (*nz) { 16549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16552d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16562d61bbb3SSatish Balay v_i = *v + i * bs; 16572d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 165826fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16592d61bbb3SSatish Balay } 16602d61bbb3SSatish Balay } 16612d61bbb3SSatish Balay } 16622d61bbb3SSatish Balay 16632d61bbb3SSatish Balay if (idx) { 1664f4259b30SLisandro Dalcin *idx = NULL; 16652d61bbb3SSatish Balay if (*nz) { 16669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16672d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16682d61bbb3SSatish Balay idx_i = *idx + i * bs; 16692d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 167026fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16712d61bbb3SSatish Balay } 16722d61bbb3SSatish Balay } 16732d61bbb3SSatish Balay } 16743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16752d61bbb3SSatish Balay } 16762d61bbb3SSatish Balay 1677d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1678d71ae5a4SJacob Faibussowitsch { 167952768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 168052768537SHong Zhang 168152768537SHong Zhang PetscFunctionBegin; 16829566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 16833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 168452768537SHong Zhang } 168552768537SHong Zhang 1686d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1687d71ae5a4SJacob Faibussowitsch { 16882d61bbb3SSatish Balay PetscFunctionBegin; 16899566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 16909566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 16913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16922d61bbb3SSatish Balay } 16932d61bbb3SSatish Balay 169466976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1695d71ae5a4SJacob Faibussowitsch { 169620e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 16972d61bbb3SSatish Balay Mat C; 169820e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 169920e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 170020e84f26SHong Zhang MatScalar *ata, *aa = a->a; 17012d61bbb3SSatish Balay 17022d61bbb3SSatish Balay PetscFunctionBegin; 17037fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 17049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1705cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 170620e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 17072d61bbb3SSatish Balay 17089566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 17099566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 17109566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 17119566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 171220e84f26SHong Zhang 171320e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 171420e84f26SHong Zhang ati = at->i; 171520e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1716fc4dec0aSBarry Smith } else { 1717fc4dec0aSBarry Smith C = *B; 171820e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 171920e84f26SHong Zhang ati = at->i; 1720fc4dec0aSBarry Smith } 1721fc4dec0aSBarry Smith 172220e84f26SHong Zhang atj = at->j; 172320e84f26SHong Zhang ata = at->a; 172420e84f26SHong Zhang 172520e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 17269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 172720e84f26SHong Zhang 172820e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 17292d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 173020e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 173120e84f26SHong Zhang for (j = 0; j < anzj; j++) { 173220e84f26SHong Zhang atj[atfill[*aj]] = i; 173320e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 1734ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 17352d61bbb3SSatish Balay } 173620e84f26SHong Zhang atfill[*aj++] += 1; 173720e84f26SHong Zhang } 173820e84f26SHong Zhang } 17399566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 17409566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 17412d61bbb3SSatish Balay 174220e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 17439566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 174420e84f26SHong Zhang 1745cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 174658b7e2c1SStefano Zampini PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 17472d61bbb3SSatish Balay *B = C; 17482d61bbb3SSatish Balay } else { 17499566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 17502d61bbb3SSatish Balay } 17513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17522d61bbb3SSatish Balay } 17532d61bbb3SSatish Balay 1754ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1755d71ae5a4SJacob Faibussowitsch { 1756453d3561SHong Zhang Mat Btrans; 1757453d3561SHong Zhang 1758453d3561SHong Zhang PetscFunctionBegin; 1759453d3561SHong Zhang *f = PETSC_FALSE; 1760acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17619566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 17633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1764453d3561SHong Zhang } 1765453d3561SHong Zhang 1766618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1767d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1768d71ae5a4SJacob Faibussowitsch { 1769b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1770b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1771b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1772b51a4376SLisandro Dalcin PetscScalar *matvals; 17732593348eSBarry Smith 17743a40ed3dSBarry Smith PetscFunctionBegin; 17759566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17763b2fbd54SBarry Smith 1777b51a4376SLisandro Dalcin M = mat->rmap->N; 1778b51a4376SLisandro Dalcin N = mat->cmap->N; 1779b51a4376SLisandro Dalcin m = mat->rmap->n; 1780b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1781b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 17822593348eSBarry Smith 1783b51a4376SLisandro Dalcin /* write matrix header */ 1784b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 17859371c9d4SSatish Balay header[1] = M; 17869371c9d4SSatish Balay header[2] = N; 17879371c9d4SSatish Balay header[3] = nz; 17889566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 17892593348eSBarry Smith 1790b51a4376SLisandro Dalcin /* store row lengths */ 17919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1792b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 17939371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 17949566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 17959566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1796b51a4376SLisandro Dalcin 1797b51a4376SLisandro Dalcin /* store column indices */ 17989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1799b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1800b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1801b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18029371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 18035f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18049566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 18059566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 18062593348eSBarry Smith 18072593348eSBarry Smith /* store nonzero values */ 18089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1809b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1810b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1811b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18129371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 18135f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18149566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 18159566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1816ce6f0cecSBarry Smith 1817b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 18189566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 18193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18202593348eSBarry Smith } 18212593348eSBarry Smith 1822d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1823d71ae5a4SJacob Faibussowitsch { 18247dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 18257dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 18267dc0baabSHong Zhang 18277dc0baabSHong Zhang PetscFunctionBegin; 18289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 18297dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 18309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 183148a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 18329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18337dc0baabSHong Zhang } 18349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18367dc0baabSHong Zhang } 18377dc0baabSHong Zhang 1838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1839d71ae5a4SJacob Faibussowitsch { 1840b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1841d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1842f3ef73ceSBarry Smith PetscViewerFormat format; 18432593348eSBarry Smith 18443a40ed3dSBarry Smith PetscFunctionBegin; 18457dc0baabSHong Zhang if (A->structure_only) { 18469566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 18473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18487dc0baabSHong Zhang } 18497dc0baabSHong Zhang 18509566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1851456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1853fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1854ade3a672SBarry Smith const char *matname; 1855bcd9e38bSBarry Smith Mat aij; 18569566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18579566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18589566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18599566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 186104929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 18623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1863fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18649566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 186544cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 186644cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18679566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 186844cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 186944cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1870aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18710e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18729371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18730e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18749371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18750e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18770ef38995SBarry Smith } 187844cd7ae7SLois Curfman McInnes #else 187948a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 188044cd7ae7SLois Curfman McInnes #endif 188144cd7ae7SLois Curfman McInnes } 188244cd7ae7SLois Curfman McInnes } 18839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 188444cd7ae7SLois Curfman McInnes } 188544cd7ae7SLois Curfman McInnes } 18869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18870ef38995SBarry Smith } else { 18889566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1889b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1890b6490206SBarry Smith for (j = 0; j < bs; j++) { 18919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1892b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1893b6490206SBarry Smith for (l = 0; l < bs; l++) { 1894aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18950e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 18969371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18970e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 18989371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18990ef38995SBarry Smith } else { 19009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 190188685aaeSLois Curfman McInnes } 190288685aaeSLois Curfman McInnes #else 19039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 190488685aaeSLois Curfman McInnes #endif 19052593348eSBarry Smith } 19062593348eSBarry Smith } 19079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 19082593348eSBarry Smith } 19092593348eSBarry Smith } 19109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1911b6490206SBarry Smith } 19129566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 19133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19142593348eSBarry Smith } 19152593348eSBarry Smith 19169804daf3SBarry Smith #include <petscdraw.h> 1917d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1918d71ae5a4SJacob Faibussowitsch { 191977ed5343SBarry Smith Mat A = (Mat)Aa; 19203270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 19216497c311SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 19220e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 19233f1db9ecSBarry Smith MatScalar *aa; 1924b0a32e0cSBarry Smith PetscViewer viewer; 1925b3e7f47fSJed Brown PetscViewerFormat format; 19266497c311SBarry Smith int color; 19273270192aSSatish Balay 19283a40ed3dSBarry Smith PetscFunctionBegin; 19299566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 19309566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 19319566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 193277ed5343SBarry Smith 19333270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1934b3e7f47fSJed Brown 1935b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1936d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1937383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1938b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 19393270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19403270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19419371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19429371c9d4SSatish Balay y_r = y_l + 1.0; 19439371c9d4SSatish Balay x_l = a->j[j] * bs; 19449371c9d4SSatish Balay x_r = x_l + 1.0; 19453270192aSSatish Balay aa = a->a + j * bs2; 19463270192aSSatish Balay for (k = 0; k < bs; k++) { 19473270192aSSatish Balay for (l = 0; l < bs; l++) { 19480e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 19499566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19503270192aSSatish Balay } 19513270192aSSatish Balay } 19523270192aSSatish Balay } 19533270192aSSatish Balay } 1954b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19553270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19563270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19579371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19589371c9d4SSatish Balay y_r = y_l + 1.0; 19599371c9d4SSatish Balay x_l = a->j[j] * bs; 19609371c9d4SSatish Balay x_r = x_l + 1.0; 19613270192aSSatish Balay aa = a->a + j * bs2; 19623270192aSSatish Balay for (k = 0; k < bs; k++) { 19633270192aSSatish Balay for (l = 0; l < bs; l++) { 19640e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19659566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19663270192aSSatish Balay } 19673270192aSSatish Balay } 19683270192aSSatish Balay } 19693270192aSSatish Balay } 1970b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19713270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19723270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19739371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19749371c9d4SSatish Balay y_r = y_l + 1.0; 19759371c9d4SSatish Balay x_l = a->j[j] * bs; 19769371c9d4SSatish Balay x_r = x_l + 1.0; 19773270192aSSatish Balay aa = a->a + j * bs2; 19783270192aSSatish Balay for (k = 0; k < bs; k++) { 19793270192aSSatish Balay for (l = 0; l < bs; l++) { 19800e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 19819566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19823270192aSSatish Balay } 19833270192aSSatish Balay } 19843270192aSSatish Balay } 19853270192aSSatish Balay } 1986d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1987b3e7f47fSJed Brown } else { 1988b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1989b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1990b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1991b3e7f47fSJed Brown PetscDraw popup; 1992b3e7f47fSJed Brown 1993b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 1994b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1995b3e7f47fSJed Brown } 1996383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 19979566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 19989566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1999383922c3SLisandro Dalcin 2000d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 2001b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 2002b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 20039371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 20049371c9d4SSatish Balay y_r = y_l + 1.0; 20059371c9d4SSatish Balay x_l = a->j[j] * bs; 20069371c9d4SSatish Balay x_r = x_l + 1.0; 2007b3e7f47fSJed Brown aa = a->a + j * bs2; 2008b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 2009b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 2010383922c3SLisandro Dalcin MatScalar v = *aa++; 2011383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 20129566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2013b3e7f47fSJed Brown } 2014b3e7f47fSJed Brown } 2015b3e7f47fSJed Brown } 2016b3e7f47fSJed Brown } 2017d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 2018b3e7f47fSJed Brown } 20193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 202077ed5343SBarry Smith } 20213270192aSSatish Balay 2022d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2023d71ae5a4SJacob Faibussowitsch { 20240e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 2025b0a32e0cSBarry Smith PetscDraw draw; 2026ace3abfcSBarry Smith PetscBool isnull; 20273270192aSSatish Balay 202877ed5343SBarry Smith PetscFunctionBegin; 20299566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 20309566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 20313ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 203277ed5343SBarry Smith 20339371c9d4SSatish Balay xr = A->cmap->n; 20349371c9d4SSatish Balay yr = A->rmap->N; 20359371c9d4SSatish Balay h = yr / 10.0; 20369371c9d4SSatish Balay w = xr / 10.0; 20379371c9d4SSatish Balay xr += w; 20389371c9d4SSatish Balay yr += h; 20399371c9d4SSatish Balay xl = -w; 20409371c9d4SSatish Balay yl = -h; 20419566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 20429566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 20439566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 20449566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 20459566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 20463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20473270192aSSatish Balay } 20483270192aSSatish Balay 2049d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2050d71ae5a4SJacob Faibussowitsch { 20519f196a02SMartin Diehl PetscBool isascii, isbinary, isdraw; 20522593348eSBarry Smith 20533a40ed3dSBarry Smith PetscFunctionBegin; 20549f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 20559566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20569566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 20579f196a02SMartin Diehl if (isascii) { 20589566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20590f5bd95cSBarry Smith } else if (isbinary) { 20609566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20610f5bd95cSBarry Smith } else if (isdraw) { 20629566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20635cd90555SBarry Smith } else { 2064a5e6ed63SBarry Smith Mat B; 20659566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20669566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20682593348eSBarry Smith } 20693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20702593348eSBarry Smith } 2071b6490206SBarry Smith 2072d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2073d71ae5a4SJacob Faibussowitsch { 2074cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2075c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2076c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2077d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 207897e567efSBarry Smith MatScalar *ap, *aa = a->a; 2079cd0e1443SSatish Balay 20803a40ed3dSBarry Smith PetscFunctionBegin; 20812d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 20829371c9d4SSatish Balay row = im[k]; 20839371c9d4SSatish Balay brow = row / bs; 20849371c9d4SSatish Balay if (row < 0) { 20859371c9d4SSatish Balay v += n; 20869371c9d4SSatish Balay continue; 20879371c9d4SSatish Balay } /* negative row */ 208854c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 20898e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 20908e3a54c0SPierre Jolivet ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 20912c3acbe9SBarry Smith nrow = ailen[brow]; 20922d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 20939371c9d4SSatish Balay if (in[l] < 0) { 20949371c9d4SSatish Balay v++; 20959371c9d4SSatish Balay continue; 20969371c9d4SSatish Balay } /* negative column */ 209754c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 20982d61bbb3SSatish Balay col = in[l]; 20992d61bbb3SSatish Balay bcol = col / bs; 21002d61bbb3SSatish Balay cidx = col % bs; 21012d61bbb3SSatish Balay ridx = row % bs; 21022d61bbb3SSatish Balay high = nrow; 21032d61bbb3SSatish Balay low = 0; /* assume unsorted */ 21042d61bbb3SSatish Balay while (high - low > 5) { 2105cd0e1443SSatish Balay t = (low + high) / 2; 2106cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2107cd0e1443SSatish Balay else low = t; 2108cd0e1443SSatish Balay } 2109cd0e1443SSatish Balay for (i = low; i < high; i++) { 2110cd0e1443SSatish Balay if (rp[i] > bcol) break; 2111cd0e1443SSatish Balay if (rp[i] == bcol) { 21122d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 21132d61bbb3SSatish Balay goto finished; 2114cd0e1443SSatish Balay } 2115cd0e1443SSatish Balay } 211697e567efSBarry Smith *v++ = 0.0; 21172d61bbb3SSatish Balay finished:; 2118cd0e1443SSatish Balay } 2119cd0e1443SSatish Balay } 21203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2121cd0e1443SSatish Balay } 2122cd0e1443SSatish Balay 2123d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2124d71ae5a4SJacob Faibussowitsch { 212592c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2126e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2127c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2128d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2129ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2130dd6ea824SBarry Smith const PetscScalar *value = v; 21319d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 213292c4ed94SBarry Smith 21333a40ed3dSBarry Smith PetscFunctionBegin; 21340e324ae4SSatish Balay if (roworiented) { 21350e324ae4SSatish Balay stepval = (n - 1) * bs; 21360e324ae4SSatish Balay } else { 21370e324ae4SSatish Balay stepval = (m - 1) * bs; 21380e324ae4SSatish Balay } 213992c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 214092c4ed94SBarry Smith row = im[k]; 21415ef9f2a5SBarry Smith if (row < 0) continue; 21426bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 214392c4ed94SBarry Smith rp = aj + ai[row]; 21447dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 214592c4ed94SBarry Smith rmax = imax[row]; 214692c4ed94SBarry Smith nrow = ailen[row]; 214792c4ed94SBarry Smith low = 0; 2148c71e6ed7SBarry Smith high = nrow; 214992c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 21505ef9f2a5SBarry Smith if (in[l] < 0) continue; 21516bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 215292c4ed94SBarry Smith col = in[l]; 21537dc0baabSHong Zhang if (!A->structure_only) { 215492c4ed94SBarry Smith if (roworiented) { 215553ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21560e324ae4SSatish Balay } else { 215753ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 215892c4ed94SBarry Smith } 21597dc0baabSHong Zhang } 216026fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 216126fbe8dcSKarl Rupp else high = nrow; 2162e2ee6c50SBarry Smith lastcol = col; 216392c4ed94SBarry Smith while (high - low > 7) { 216492c4ed94SBarry Smith t = (low + high) / 2; 216592c4ed94SBarry Smith if (rp[t] > col) high = t; 216692c4ed94SBarry Smith else low = t; 216792c4ed94SBarry Smith } 216892c4ed94SBarry Smith for (i = low; i < high; i++) { 216992c4ed94SBarry Smith if (rp[i] > col) break; 217092c4ed94SBarry Smith if (rp[i] == col) { 21717dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21728a84c255SSatish Balay bap = ap + bs2 * i; 21730e324ae4SSatish Balay if (roworiented) { 21748a84c255SSatish Balay if (is == ADD_VALUES) { 2175dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2176ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2177dd9472c6SBarry Smith } 21780e324ae4SSatish Balay } else { 2179dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2180ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2181dd9472c6SBarry Smith } 2182dd9472c6SBarry Smith } 21830e324ae4SSatish Balay } else { 21840e324ae4SSatish Balay if (is == ADD_VALUES) { 218553ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2186ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 218753ef36baSBarry Smith bap += bs; 2188dd9472c6SBarry Smith } 21890e324ae4SSatish Balay } else { 219053ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2191ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 219253ef36baSBarry Smith bap += bs; 21938a84c255SSatish Balay } 2194dd9472c6SBarry Smith } 2195dd9472c6SBarry Smith } 2196f1241b54SBarry Smith goto noinsert2; 219792c4ed94SBarry Smith } 219892c4ed94SBarry Smith } 219989280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 22005f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 22017dc0baabSHong Zhang if (A->structure_only) { 22027dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 22037dc0baabSHong Zhang } else { 2204fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 22057dc0baabSHong Zhang } 22069371c9d4SSatish Balay N = nrow++ - 1; 22079371c9d4SSatish Balay high++; 220892c4ed94SBarry Smith /* shift up all the later entries in this row */ 22099566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 221092c4ed94SBarry Smith rp[i] = col; 22117dc0baabSHong Zhang if (!A->structure_only) { 22129566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 22138a84c255SSatish Balay bap = ap + bs2 * i; 22140e324ae4SSatish Balay if (roworiented) { 2215dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2216ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2217dd9472c6SBarry Smith } 22180e324ae4SSatish Balay } else { 2219dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2220ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2221dd9472c6SBarry Smith } 2222dd9472c6SBarry Smith } 22237dc0baabSHong Zhang } 2224f1241b54SBarry Smith noinsert2:; 222592c4ed94SBarry Smith low = i; 222692c4ed94SBarry Smith } 222792c4ed94SBarry Smith ailen[row] = nrow; 222892c4ed94SBarry Smith } 22293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 223092c4ed94SBarry Smith } 223126e093fcSHong Zhang 2232d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2233d71ae5a4SJacob Faibussowitsch { 2234584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2235580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2236d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2237c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 22383f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 22393447b6efSHong Zhang PetscReal ratio = 0.6; 2240584200bdSSatish Balay 22413a40ed3dSBarry Smith PetscFunctionBegin; 2242d32568d8SPierre Jolivet if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2243584200bdSSatish Balay 224443ee02c3SBarry Smith if (m) rmax = ailen[0]; 2245584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2246584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2247584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2248d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2249584200bdSSatish Balay if (fshift) { 2250580bdb30SBarry Smith ip = aj + ai[i]; 2251580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2252584200bdSSatish Balay N = ailen[i]; 22539566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 225448a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2255672ba085SHong Zhang } 2256584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2257584200bdSSatish Balay } 2258584200bdSSatish Balay if (mbs) { 2259584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2260584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2261584200bdSSatish Balay } 22627c565772SBarry Smith 2263584200bdSSatish Balay /* reset ilen and imax for each row */ 22647c565772SBarry Smith a->nonzerorowcnt = 0; 2265672ba085SHong Zhang if (A->structure_only) { 22669566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2267672ba085SHong Zhang } else { /* !A->structure_only */ 2268584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2269584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22707c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2271584200bdSSatish Balay } 2272672ba085SHong Zhang } 2273a7c10996SSatish Balay a->nz = ai[mbs]; 2274584200bdSSatish Balay 2275584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2276b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2277ff6a9541SJacob Faibussowitsch if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 22785f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 22799566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 22809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 22819566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 228226fbe8dcSKarl Rupp 22838e58a170SBarry Smith A->info.mallocs += a->reallocs; 2284e2f3b5e9SSatish Balay a->reallocs = 0; 22850e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2286647a6520SHong Zhang a->rmax = rmax; 2287cf4441caSHong Zhang 228848a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 22893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2290584200bdSSatish Balay } 2291584200bdSSatish Balay 2292bea157c4SSatish Balay /* 2293bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2294bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2295a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2296bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2297bea157c4SSatish Balay */ 2298d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2299d71ae5a4SJacob Faibussowitsch { 2300ff6a9541SJacob Faibussowitsch PetscInt j = 0; 23013a40ed3dSBarry Smith 2302433994e6SBarry Smith PetscFunctionBegin; 2303ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; j++) { 2304ff6a9541SJacob Faibussowitsch PetscInt row = idx[i]; 2305a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2306bea157c4SSatish Balay sizes[j] = 1; 2307bea157c4SSatish Balay i++; 2308e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2309bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2310bea157c4SSatish Balay i++; 23116aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2312ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_TRUE; 2313ff6a9541SJacob Faibussowitsch for (PetscInt k = 1; k < bs; k++) { 2314bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2315bea157c4SSatish Balay flg = PETSC_FALSE; 2316bea157c4SSatish Balay break; 2317d9b7c43dSSatish Balay } 2318bea157c4SSatish Balay } 2319abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2320bea157c4SSatish Balay sizes[j] = bs; 2321bea157c4SSatish Balay i += bs; 2322bea157c4SSatish Balay } else { 2323bea157c4SSatish Balay sizes[j] = 1; 2324bea157c4SSatish Balay i++; 2325bea157c4SSatish Balay } 2326bea157c4SSatish Balay } 2327bea157c4SSatish Balay } 2328bea157c4SSatish Balay *bs_max = j; 23293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2330d9b7c43dSSatish Balay } 2331d9b7c43dSSatish Balay 2332d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2333d71ae5a4SJacob Faibussowitsch { 2334d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2335f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2336d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 233787828ca2SBarry Smith PetscScalar zero = 0.0; 23383f1db9ecSBarry Smith MatScalar *aa; 233997b48c8fSBarry Smith const PetscScalar *xx; 234097b48c8fSBarry Smith PetscScalar *bb; 2341d9b7c43dSSatish Balay 23423a40ed3dSBarry Smith PetscFunctionBegin; 2343dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 234497b48c8fSBarry Smith if (x && b) { 23459566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23469566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 2347ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 23489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 235097b48c8fSBarry Smith } 235197b48c8fSBarry Smith 2352d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2353bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23549566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2355bea157c4SSatish Balay 2356563b5814SBarry Smith /* copy IS values to rows, and sort them */ 235726fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23589566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 235997b48c8fSBarry Smith 2360a9817697SBarry Smith if (baij->keepnonzeropattern) { 236126fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2362dffd3267SBarry Smith bs_max = is_n; 2363dffd3267SBarry Smith } else { 23649566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2365e56f5c9eSBarry Smith A->nonzerostate++; 2366dffd3267SBarry Smith } 2367bea157c4SSatish Balay 2368bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2369bea157c4SSatish Balay row = rows[j]; 23705f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2371bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2372835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2373a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2374d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2375bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2376bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2377bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 237826fbe8dcSKarl Rupp 23799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2380a07cd24cSSatish Balay } 2381563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 23829927e4dfSBarry Smith for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2383f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2384bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2385f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2386bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 23876bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2388bea157c4SSatish Balay for (k = 0; k < count; k++) { 2389d9b7c43dSSatish Balay aa[0] = zero; 2390d9b7c43dSSatish Balay aa += bs; 2391d9b7c43dSSatish Balay } 23929927e4dfSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2393d9b7c43dSSatish Balay } 2394bea157c4SSatish Balay } 2395bea157c4SSatish Balay 23969566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 23979566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 23983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2399d9b7c43dSSatish Balay } 24001c351548SSatish Balay 2401ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2402d71ae5a4SJacob Faibussowitsch { 240397b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 240497b48c8fSBarry Smith PetscInt i, j, k, count; 240597b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 240697b48c8fSBarry Smith PetscScalar zero = 0.0; 240797b48c8fSBarry Smith MatScalar *aa; 240897b48c8fSBarry Smith const PetscScalar *xx; 240997b48c8fSBarry Smith PetscScalar *bb; 241056777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 241197b48c8fSBarry Smith 241297b48c8fSBarry Smith PetscFunctionBegin; 2413dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 241497b48c8fSBarry Smith if (x && b) { 24159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 24169566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 241756777dd2SBarry Smith vecs = PETSC_TRUE; 241897b48c8fSBarry Smith } 241997b48c8fSBarry Smith 242097b48c8fSBarry Smith /* zero the columns */ 24219566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 242297b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 24235f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 242497b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 242597b48c8fSBarry Smith } 242697b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 242797b48c8fSBarry Smith if (!zeroed[i]) { 242897b48c8fSBarry Smith row = i / bs; 242997b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 243097b48c8fSBarry Smith for (k = 0; k < bs; k++) { 243197b48c8fSBarry Smith col = bs * baij->j[j] + k; 243297b48c8fSBarry Smith if (zeroed[col]) { 2433835f2295SStefano Zampini aa = baij->a + j * bs2 + (i % bs) + bs * k; 243456777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 243597b48c8fSBarry Smith aa[0] = 0.0; 243697b48c8fSBarry Smith } 243797b48c8fSBarry Smith } 243897b48c8fSBarry Smith } 243956777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 244097b48c8fSBarry Smith } 24419566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 244256777dd2SBarry Smith if (vecs) { 24439566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 24449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 244556777dd2SBarry Smith } 244697b48c8fSBarry Smith 244797b48c8fSBarry Smith /* zero the rows */ 244897b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 244997b48c8fSBarry Smith row = is_idx[i]; 245097b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2451835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 245297b48c8fSBarry Smith for (k = 0; k < count; k++) { 245397b48c8fSBarry Smith aa[0] = zero; 245497b48c8fSBarry Smith aa += bs; 245597b48c8fSBarry Smith } 2456dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 245797b48c8fSBarry Smith } 24589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 246097b48c8fSBarry Smith } 246197b48c8fSBarry Smith 2462d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2463d71ae5a4SJacob Faibussowitsch { 24642d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2465e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2466c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2467d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2468c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2469ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2470d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24712d61bbb3SSatish Balay 24722d61bbb3SSatish Balay PetscFunctionBegin; 24732d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2474085a36d4SBarry Smith row = im[k]; 2475085a36d4SBarry Smith brow = row / bs; 24765ef9f2a5SBarry Smith if (row < 0) continue; 24776bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24788e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 24798e3a54c0SPierre Jolivet if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 24802d61bbb3SSatish Balay rmax = imax[brow]; 24812d61bbb3SSatish Balay nrow = ailen[brow]; 24822d61bbb3SSatish Balay low = 0; 2483c71e6ed7SBarry Smith high = nrow; 24842d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 24855ef9f2a5SBarry Smith if (in[l] < 0) continue; 24866bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 24879371c9d4SSatish Balay col = in[l]; 24889371c9d4SSatish Balay bcol = col / bs; 24899371c9d4SSatish Balay ridx = row % bs; 24909371c9d4SSatish Balay cidx = col % bs; 2491672ba085SHong Zhang if (!A->structure_only) { 24922d61bbb3SSatish Balay if (roworiented) { 24935ef9f2a5SBarry Smith value = v[l + k * n]; 24942d61bbb3SSatish Balay } else { 24952d61bbb3SSatish Balay value = v[k + l * m]; 24962d61bbb3SSatish Balay } 2497672ba085SHong Zhang } 24989371c9d4SSatish Balay if (col <= lastcol) low = 0; 24999371c9d4SSatish Balay else high = nrow; 2500e2ee6c50SBarry Smith lastcol = col; 25012d61bbb3SSatish Balay while (high - low > 7) { 25022d61bbb3SSatish Balay t = (low + high) / 2; 25032d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 25042d61bbb3SSatish Balay else low = t; 25052d61bbb3SSatish Balay } 25062d61bbb3SSatish Balay for (i = low; i < high; i++) { 25072d61bbb3SSatish Balay if (rp[i] > bcol) break; 25082d61bbb3SSatish Balay if (rp[i] == bcol) { 25098e3a54c0SPierre Jolivet bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2510672ba085SHong Zhang if (!A->structure_only) { 25112d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 25122d61bbb3SSatish Balay else *bap = value; 2513672ba085SHong Zhang } 25142d61bbb3SSatish Balay goto noinsert1; 25152d61bbb3SSatish Balay } 25162d61bbb3SSatish Balay } 25172d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 25185f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2519672ba085SHong Zhang if (A->structure_only) { 2520672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2521672ba085SHong Zhang } else { 2522fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2523672ba085SHong Zhang } 25249371c9d4SSatish Balay N = nrow++ - 1; 25259371c9d4SSatish Balay high++; 25262d61bbb3SSatish Balay /* shift up all the later entries in this row */ 25279566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 25282d61bbb3SSatish Balay rp[i] = bcol; 2529580bdb30SBarry Smith if (!A->structure_only) { 25309566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 25319566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2532580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2533580bdb30SBarry Smith } 2534085a36d4SBarry Smith a->nz++; 25352d61bbb3SSatish Balay noinsert1:; 25362d61bbb3SSatish Balay low = i; 25372d61bbb3SSatish Balay } 25382d61bbb3SSatish Balay ailen[brow] = nrow; 25392d61bbb3SSatish Balay } 25403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25412d61bbb3SSatish Balay } 25422d61bbb3SSatish Balay 2543ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2544d71ae5a4SJacob Faibussowitsch { 25452d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 25462d61bbb3SSatish Balay Mat outA; 2547ace3abfcSBarry Smith PetscBool row_identity, col_identity; 25482d61bbb3SSatish Balay 25492d61bbb3SSatish Balay PetscFunctionBegin; 25505f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 25519566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25529566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25535f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25542d61bbb3SSatish Balay 25552d61bbb3SSatish Balay outA = inA; 2556d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25579566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25589566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25592d61bbb3SSatish Balay 25609566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2561cf242676SKris Buschelman 25629566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25639566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2564c3122656SLisandro Dalcin a->row = row; 25659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2567c3122656SLisandro Dalcin a->col = col; 2568c38d4ed2SBarry Smith 2569c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25719566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2572c38d4ed2SBarry Smith 25739566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2574aa624791SPierre Jolivet if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 25759566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25772d61bbb3SSatish Balay } 2578d9b7c43dSSatish Balay 2579ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2580d71ae5a4SJacob Faibussowitsch { 258127a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 258227a8da17SBarry Smith 258327a8da17SBarry Smith PetscFunctionBegin; 2584ff6a9541SJacob Faibussowitsch baij->nz = baij->maxnz; 2585ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2586ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 25873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 258827a8da17SBarry Smith } 258927a8da17SBarry Smith 259027a8da17SBarry Smith /*@ 2591d8a51d2aSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 259227a8da17SBarry Smith 259327a8da17SBarry Smith Input Parameters: 259411a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix 2595d8a51d2aSBarry Smith - indices - the block column indices 259627a8da17SBarry Smith 259715091d37SBarry Smith Level: advanced 259815091d37SBarry Smith 259927a8da17SBarry Smith Notes: 260027a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 260127a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 260211a5261eSBarry Smith of the `MatSetValues()` operation. 260327a8da17SBarry Smith 260427a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 260511a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 260627a8da17SBarry Smith 260711a5261eSBarry Smith MUST be called before any calls to `MatSetValues()` 260827a8da17SBarry Smith 26091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 261027a8da17SBarry Smith @*/ 2611d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2612d71ae5a4SJacob Faibussowitsch { 261327a8da17SBarry Smith PetscFunctionBegin; 26140700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 26154f572ea9SToby Isaac PetscAssertPointer(indices, 2); 2616810441c8SPierre Jolivet PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 26173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 261827a8da17SBarry Smith } 261927a8da17SBarry Smith 262066976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2621d71ae5a4SJacob Faibussowitsch { 2622273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2623c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2624273d9f13SBarry Smith PetscReal atmp; 262587828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2626273d9f13SBarry Smith MatScalar *aa; 2627c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2628273d9f13SBarry Smith 2629273d9f13SBarry Smith PetscFunctionBegin; 26305f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2631d0f46423SBarry Smith bs = A->rmap->bs; 2632273d9f13SBarry Smith aa = a->a; 2633273d9f13SBarry Smith ai = a->i; 2634273d9f13SBarry Smith aj = a->j; 2635273d9f13SBarry Smith mbs = a->mbs; 2636273d9f13SBarry Smith 26379566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 26389566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 26399566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 26405f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2641273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 26429371c9d4SSatish Balay ncols = ai[1] - ai[0]; 26439371c9d4SSatish Balay ai++; 2644273d9f13SBarry Smith brow = bs * i; 2645273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2646273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2647273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 26489371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 26499371c9d4SSatish Balay aa++; 2650273d9f13SBarry Smith row = brow + krow; /* row index */ 26519371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 26529371c9d4SSatish Balay x[row] = atmp; 26539371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26549371c9d4SSatish Balay } 2655273d9f13SBarry Smith } 2656273d9f13SBarry Smith } 2657273d9f13SBarry Smith aj++; 2658273d9f13SBarry Smith } 2659273d9f13SBarry Smith } 26609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 26613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2662273d9f13SBarry Smith } 2663273d9f13SBarry Smith 2664eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2665eede4a3fSMark Adams { 2666eede4a3fSMark Adams Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2667eede4a3fSMark Adams PetscInt i, j, n, row, bs, *ai, mbs; 2668eede4a3fSMark Adams PetscReal atmp; 2669eede4a3fSMark Adams PetscScalar *x, zero = 0.0; 2670eede4a3fSMark Adams MatScalar *aa; 2671eede4a3fSMark Adams PetscInt ncols, brow, krow, kcol; 2672eede4a3fSMark Adams 2673eede4a3fSMark Adams PetscFunctionBegin; 2674eede4a3fSMark Adams PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2675eede4a3fSMark Adams bs = A->rmap->bs; 2676eede4a3fSMark Adams aa = a->a; 2677eede4a3fSMark Adams ai = a->i; 2678eede4a3fSMark Adams mbs = a->mbs; 2679eede4a3fSMark Adams 2680eede4a3fSMark Adams PetscCall(VecSet(v, zero)); 2681eede4a3fSMark Adams PetscCall(VecGetArrayWrite(v, &x)); 2682eede4a3fSMark Adams PetscCall(VecGetLocalSize(v, &n)); 2683eede4a3fSMark Adams PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2684eede4a3fSMark Adams for (i = 0; i < mbs; i++) { 2685eede4a3fSMark Adams ncols = ai[1] - ai[0]; 2686eede4a3fSMark Adams ai++; 2687eede4a3fSMark Adams brow = bs * i; 2688eede4a3fSMark Adams for (j = 0; j < ncols; j++) { 2689eede4a3fSMark Adams for (kcol = 0; kcol < bs; kcol++) { 2690eede4a3fSMark Adams for (krow = 0; krow < bs; krow++) { 2691eede4a3fSMark Adams atmp = PetscAbsScalar(*aa); 2692eede4a3fSMark Adams aa++; 2693eede4a3fSMark Adams row = brow + krow; /* row index */ 2694eede4a3fSMark Adams x[row] += atmp; 2695eede4a3fSMark Adams } 2696eede4a3fSMark Adams } 2697eede4a3fSMark Adams } 2698eede4a3fSMark Adams } 2699eede4a3fSMark Adams PetscCall(VecRestoreArrayWrite(v, &x)); 2700eede4a3fSMark Adams PetscFunctionReturn(PETSC_SUCCESS); 2701eede4a3fSMark Adams } 2702eede4a3fSMark Adams 270366976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2704d71ae5a4SJacob Faibussowitsch { 27053c896bc6SHong Zhang PetscFunctionBegin; 27063c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 27073c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 27083c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27093c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2710d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 27113c896bc6SHong Zhang 27125f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 27135f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 27149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 27159566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 27163c896bc6SHong Zhang } else { 27179566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 27183c896bc6SHong Zhang } 27193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27203c896bc6SHong Zhang } 27213c896bc6SHong Zhang 2722d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2723d71ae5a4SJacob Faibussowitsch { 2724f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27256e111a19SKarl Rupp 2726f2a5309cSSatish Balay PetscFunctionBegin; 2727f2a5309cSSatish Balay *array = a->a; 27283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2729f2a5309cSSatish Balay } 2730f2a5309cSSatish Balay 2731d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2732d71ae5a4SJacob Faibussowitsch { 2733f2a5309cSSatish Balay PetscFunctionBegin; 2734cda14afcSprj- *array = NULL; 27353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2736f2a5309cSSatish Balay } 2737f2a5309cSSatish Balay 2738d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2739d71ae5a4SJacob Faibussowitsch { 2740b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 274152768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 274252768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 274352768537SHong Zhang 274452768537SHong Zhang PetscFunctionBegin; 274552768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 27469566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 27473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 274852768537SHong Zhang } 274952768537SHong Zhang 2750d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2751d71ae5a4SJacob Faibussowitsch { 275242ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 275331ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2754e838b9e7SJed Brown PetscBLASInt one = 1; 275542ee4b1aSHong Zhang 275642ee4b1aSHong Zhang PetscFunctionBegin; 2757134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2758134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2759134adf20SPierre Jolivet if (e) { 27609566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2761134adf20SPierre Jolivet if (e) { 27629566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2763134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2764134adf20SPierre Jolivet } 2765134adf20SPierre Jolivet } 276654c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2767134adf20SPierre Jolivet } 276842ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2769f4df32b1SMatthew Knepley PetscScalar alpha = a; 2770c5df96a5SBarry Smith PetscBLASInt bnz; 27719566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2772792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 27739566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2774ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 27759566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 277642ee4b1aSHong Zhang } else { 277752768537SHong Zhang Mat B; 277852768537SHong Zhang PetscInt *nnz; 277954c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 27819566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 27829566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 27839566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 27849566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 27859566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 27869566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 27879566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 27889566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 27899566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 27909566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 279142ee4b1aSHong Zhang } 27923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 279342ee4b1aSHong Zhang } 279442ee4b1aSHong Zhang 2795d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2796d71ae5a4SJacob Faibussowitsch { 2797ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 27982726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27992726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 28002726fb6dSPierre Jolivet MatScalar *aa = a->a; 28012726fb6dSPierre Jolivet 28022726fb6dSPierre Jolivet PetscFunctionBegin; 28032726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 28043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2805ff6a9541SJacob Faibussowitsch #else 2806ff6a9541SJacob Faibussowitsch (void)A; 2807ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2808ff6a9541SJacob Faibussowitsch #endif 28092726fb6dSPierre Jolivet } 28102726fb6dSPierre Jolivet 2811ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2812d71ae5a4SJacob Faibussowitsch { 2813ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 281499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 281599cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2816dd6ea824SBarry Smith MatScalar *aa = a->a; 281799cafbc1SBarry Smith 281899cafbc1SBarry Smith PetscFunctionBegin; 281999cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 28203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2821ff6a9541SJacob Faibussowitsch #else 2822ff6a9541SJacob Faibussowitsch (void)A; 2823ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2824ff6a9541SJacob Faibussowitsch #endif 282599cafbc1SBarry Smith } 282699cafbc1SBarry Smith 2827ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2828d71ae5a4SJacob Faibussowitsch { 2829ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 283099cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 283199cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2832dd6ea824SBarry Smith MatScalar *aa = a->a; 283399cafbc1SBarry Smith 283499cafbc1SBarry Smith PetscFunctionBegin; 283599cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 28363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2837ff6a9541SJacob Faibussowitsch #else 2838ff6a9541SJacob Faibussowitsch (void)A; 2839ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2840ff6a9541SJacob Faibussowitsch #endif 284199cafbc1SBarry Smith } 284299cafbc1SBarry Smith 28433acb8795SBarry Smith /* 28442479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 28453acb8795SBarry Smith */ 2846ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2847d71ae5a4SJacob Faibussowitsch { 28483acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 28493acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 28503acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 28513acb8795SBarry Smith 28523acb8795SBarry Smith PetscFunctionBegin; 28533acb8795SBarry Smith *nn = n; 28543ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28555f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 28569566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28579566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28593acb8795SBarry Smith jj = a->j; 2860ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 28613acb8795SBarry Smith cia[0] = oshift; 2862ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28639566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 28643acb8795SBarry Smith jj = a->j; 28653acb8795SBarry Smith for (row = 0; row < m; row++) { 28663acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 28673acb8795SBarry Smith for (i = 0; i < mr; i++) { 28683acb8795SBarry Smith col = *jj++; 286926fbe8dcSKarl Rupp 28703acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 28713acb8795SBarry Smith } 28723acb8795SBarry Smith } 28739566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 28749371c9d4SSatish Balay *ia = cia; 28759371c9d4SSatish Balay *ja = cja; 28763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28773acb8795SBarry Smith } 28783acb8795SBarry Smith 2879ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2880d71ae5a4SJacob Faibussowitsch { 28813acb8795SBarry Smith PetscFunctionBegin; 28823ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28839566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 28849566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 28853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28863acb8795SBarry Smith } 28873acb8795SBarry Smith 2888525d23c0SHong Zhang /* 2889525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2890525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2891040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2892525d23c0SHong Zhang */ 2893d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2894d71ae5a4SJacob Faibussowitsch { 2895525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2896c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2897525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2898525d23c0SHong Zhang PetscInt *cspidx; 2899f6d58c54SBarry Smith 2900f6d58c54SBarry Smith PetscFunctionBegin; 2901525d23c0SHong Zhang *nn = n; 29023ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2903f6d58c54SBarry Smith 29049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 29059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 29069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 29079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2908525d23c0SHong Zhang jj = a->j; 2909ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 2910525d23c0SHong Zhang cia[0] = oshift; 2911ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 29129566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2913525d23c0SHong Zhang jj = a->j; 2914525d23c0SHong Zhang for (row = 0; row < m; row++) { 2915525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2916525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2917525d23c0SHong Zhang col = *jj++; 2918525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2919525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2920525d23c0SHong Zhang } 2921525d23c0SHong Zhang } 29229566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2923071fcb05SBarry Smith *ia = cia; 2924071fcb05SBarry Smith *ja = cja; 2925525d23c0SHong Zhang *spidx = cspidx; 29263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2927f6d58c54SBarry Smith } 2928f6d58c54SBarry Smith 2929d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2930d71ae5a4SJacob Faibussowitsch { 2931525d23c0SHong Zhang PetscFunctionBegin; 29329566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 29339566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 29343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2935f6d58c54SBarry Smith } 293699cafbc1SBarry Smith 293766976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2938d71ae5a4SJacob Faibussowitsch { 29397d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 29407d68702bSBarry Smith 29417d68702bSBarry Smith PetscFunctionBegin; 294248a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 29439566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 29443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29457d68702bSBarry Smith } 29467d68702bSBarry Smith 294717ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 294817ea310bSPierre Jolivet { 294917ea310bSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 295017ea310bSPierre Jolivet PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 295117ea310bSPierre Jolivet PetscInt m = A->rmap->N, *ailen = a->ilen; 295217ea310bSPierre Jolivet PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 295317ea310bSPierre Jolivet MatScalar *aa = a->a, *ap; 295417ea310bSPierre Jolivet PetscBool zero; 295517ea310bSPierre Jolivet 295617ea310bSPierre Jolivet PetscFunctionBegin; 295717ea310bSPierre Jolivet PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 295817ea310bSPierre Jolivet if (m) rmax = ailen[0]; 295917ea310bSPierre Jolivet for (i = 1; i <= mbs; i++) { 296017ea310bSPierre Jolivet for (k = ai[i - 1]; k < ai[i]; k++) { 296117ea310bSPierre Jolivet zero = PETSC_TRUE; 296217ea310bSPierre Jolivet ap = aa + bs2 * k; 296317ea310bSPierre Jolivet for (j = 0; j < bs2 && zero; j++) { 296417ea310bSPierre Jolivet if (ap[j] != 0.0) zero = PETSC_FALSE; 296517ea310bSPierre Jolivet } 296617ea310bSPierre Jolivet if (zero && (aj[k] != i - 1 || !keep)) fshift++; 296717ea310bSPierre Jolivet else { 296817ea310bSPierre Jolivet if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 296917ea310bSPierre Jolivet aj[k - fshift] = aj[k]; 297017ea310bSPierre Jolivet PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 297117ea310bSPierre Jolivet } 297217ea310bSPierre Jolivet } 297317ea310bSPierre Jolivet ai[i - 1] -= fshift_prev; 297417ea310bSPierre Jolivet fshift_prev = fshift; 297517ea310bSPierre Jolivet ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 297617ea310bSPierre Jolivet a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 297717ea310bSPierre Jolivet rmax = PetscMax(rmax, ailen[i - 1]); 297817ea310bSPierre Jolivet } 297917ea310bSPierre Jolivet if (fshift) { 298017ea310bSPierre Jolivet if (mbs) { 298117ea310bSPierre Jolivet ai[mbs] -= fshift; 298217ea310bSPierre Jolivet a->nz = ai[mbs]; 298317ea310bSPierre Jolivet } 298417ea310bSPierre Jolivet PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 298517ea310bSPierre Jolivet A->nonzerostate++; 298617ea310bSPierre Jolivet A->info.nz_unneeded += (PetscReal)fshift; 298717ea310bSPierre Jolivet a->rmax = rmax; 298817ea310bSPierre Jolivet PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 298917ea310bSPierre Jolivet PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 299017ea310bSPierre Jolivet } 299117ea310bSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 299217ea310bSPierre Jolivet } 299317ea310bSPierre Jolivet 2994dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2995cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2996cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2997cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 299897304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 29997c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 30007c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 3001f4259b30SLisandro Dalcin NULL, 3002f4259b30SLisandro Dalcin NULL, 3003f4259b30SLisandro Dalcin NULL, 3004f4259b30SLisandro Dalcin /* 10*/ NULL, 3005cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 3006f4259b30SLisandro Dalcin NULL, 3007f4259b30SLisandro Dalcin NULL, 3008f2501298SSatish Balay MatTranspose_SeqBAIJ, 300997304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 3010cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 3011cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 3012cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 3013cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 3014f4259b30SLisandro Dalcin /* 20*/ NULL, 3015cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 3016cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 3017cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 3018d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 3019f4259b30SLisandro Dalcin NULL, 3020f4259b30SLisandro Dalcin NULL, 3021f4259b30SLisandro Dalcin NULL, 3022f4259b30SLisandro Dalcin NULL, 302326cec326SBarry Smith /* 29*/ MatSetUp_Seq_Hash, 3024f4259b30SLisandro Dalcin NULL, 3025f4259b30SLisandro Dalcin NULL, 3026f4259b30SLisandro Dalcin NULL, 3027f4259b30SLisandro Dalcin NULL, 3028d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 3029f4259b30SLisandro Dalcin NULL, 3030f4259b30SLisandro Dalcin NULL, 3031cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 3032f4259b30SLisandro Dalcin NULL, 3033d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 30347dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 3035cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 3036cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 30373c896bc6SHong Zhang MatCopy_SeqBAIJ, 3038f4259b30SLisandro Dalcin /* 44*/ NULL, 3039cc2dc46cSBarry Smith MatScale_SeqBAIJ, 30407d68702bSBarry Smith MatShift_SeqBAIJ, 3041f4259b30SLisandro Dalcin NULL, 304297b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 3043f4259b30SLisandro Dalcin /* 49*/ NULL, 30443b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 304592c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 30463acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 30473acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 304893dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 3049f4259b30SLisandro Dalcin NULL, 3050f4259b30SLisandro Dalcin NULL, 3051090001bdSToby Isaac NULL, 3052d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 30537dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3054b9b97703SBarry Smith MatDestroy_SeqBAIJ, 3055b9b97703SBarry Smith MatView_SeqBAIJ, 3056f4259b30SLisandro Dalcin NULL, 3057f4259b30SLisandro Dalcin NULL, 3058f4259b30SLisandro Dalcin /* 64*/ NULL, 3059f4259b30SLisandro Dalcin NULL, 3060f4259b30SLisandro Dalcin NULL, 3061f4259b30SLisandro Dalcin NULL, 30628bb0f5c6SPierre Jolivet MatGetRowMaxAbs_SeqBAIJ, 30638bb0f5c6SPierre Jolivet /* 69*/ NULL, 3064c87e5d42SMatthew Knepley MatConvert_Basic, 3065f4259b30SLisandro Dalcin NULL, 3066f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 3067f4259b30SLisandro Dalcin NULL, 30688bb0f5c6SPierre Jolivet /* 74*/ NULL, 3069f4259b30SLisandro Dalcin NULL, 3070f4259b30SLisandro Dalcin NULL, 3071f4259b30SLisandro Dalcin NULL, 30725bba2384SShri Abhyankar MatLoad_SeqBAIJ, 30738bb0f5c6SPierre Jolivet /* 79*/ NULL, 30748bb0f5c6SPierre Jolivet NULL, 30758bb0f5c6SPierre Jolivet NULL, 30768bb0f5c6SPierre Jolivet NULL, 30778bb0f5c6SPierre Jolivet NULL, 3078f4259b30SLisandro Dalcin /* 84*/ NULL, 3079f4259b30SLisandro Dalcin NULL, 3080f4259b30SLisandro Dalcin NULL, 3081f4259b30SLisandro Dalcin NULL, 3082f4259b30SLisandro Dalcin NULL, 3083f4259b30SLisandro Dalcin /* 89*/ NULL, 3084f4259b30SLisandro Dalcin NULL, 3085f4259b30SLisandro Dalcin NULL, 3086f4259b30SLisandro Dalcin NULL, 30878bb0f5c6SPierre Jolivet MatConjugate_SeqBAIJ, 3088f4259b30SLisandro Dalcin /* 94*/ NULL, 3089f4259b30SLisandro Dalcin NULL, 30908bb0f5c6SPierre Jolivet MatRealPart_SeqBAIJ, 30918bb0f5c6SPierre Jolivet MatImaginaryPart_SeqBAIJ, 3092f4259b30SLisandro Dalcin NULL, 3093f4259b30SLisandro Dalcin /* 99*/ NULL, 3094f4259b30SLisandro Dalcin NULL, 3095f4259b30SLisandro Dalcin NULL, 3096f4259b30SLisandro Dalcin NULL, 30978bb0f5c6SPierre Jolivet NULL, 30988bb0f5c6SPierre Jolivet /*104*/ MatMissingDiagonal_SeqBAIJ, 30998bb0f5c6SPierre Jolivet NULL, 31008bb0f5c6SPierre Jolivet NULL, 3101f4259b30SLisandro Dalcin NULL, 3102f4259b30SLisandro Dalcin NULL, 3103f4259b30SLisandro Dalcin /*109*/ NULL, 3104f4259b30SLisandro Dalcin NULL, 3105f4259b30SLisandro Dalcin NULL, 3106547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 3107d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 31088bb0f5c6SPierre Jolivet /*114*/ NULL, 3109f4259b30SLisandro Dalcin NULL, 3110857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 31113964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 3112f4259b30SLisandro Dalcin NULL, 31138bb0f5c6SPierre Jolivet /*119*/ NULL, 3114f4259b30SLisandro Dalcin NULL, 3115f4259b30SLisandro Dalcin NULL, 3116f4259b30SLisandro Dalcin NULL, 3117f4259b30SLisandro Dalcin NULL, 31188bb0f5c6SPierre Jolivet /*124*/ NULL, 31198bb0f5c6SPierre Jolivet NULL, 31208bb0f5c6SPierre Jolivet NULL, 31218bb0f5c6SPierre Jolivet MatSetBlockSizes_Default, 31228bb0f5c6SPierre Jolivet NULL, 31238bb0f5c6SPierre Jolivet /*129*/ MatFDColoringSetUp_SeqXAIJ, 31248bb0f5c6SPierre Jolivet NULL, 31258bb0f5c6SPierre Jolivet MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 31268bb0f5c6SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 31278bb0f5c6SPierre Jolivet NULL, 3128f4259b30SLisandro Dalcin /*134*/ NULL, 3129f4259b30SLisandro Dalcin NULL, 3130f4259b30SLisandro Dalcin NULL, 3131eede4a3fSMark Adams MatEliminateZeros_SeqBAIJ, 31324cc2b5b5SPierre Jolivet MatGetRowSumAbs_SeqBAIJ, 31338bb0f5c6SPierre Jolivet /*139*/ NULL, 313442ce410bSJunchao Zhang NULL, 313542ce410bSJunchao Zhang NULL, 313603db1824SAlex Lindsay MatCopyHashToXAIJ_Seq_Hash, 3137c2be7ffeSStefano Zampini NULL, 313803db1824SAlex Lindsay NULL}; 31392593348eSBarry Smith 3140ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3141d71ae5a4SJacob Faibussowitsch { 31423e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31438ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31443e90b805SBarry Smith 31453e90b805SBarry Smith PetscFunctionBegin; 31465f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31473e90b805SBarry Smith 31483e90b805SBarry Smith /* allocate space for values if not already there */ 3149ff6a9541SJacob Faibussowitsch if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 31503e90b805SBarry Smith 31513e90b805SBarry Smith /* copy values over */ 31529566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 31533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31543e90b805SBarry Smith } 31553e90b805SBarry Smith 3156ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3157d71ae5a4SJacob Faibussowitsch { 31583e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31598ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31603e90b805SBarry Smith 31613e90b805SBarry Smith PetscFunctionBegin; 31625f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31635f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 31643e90b805SBarry Smith 31653e90b805SBarry Smith /* copy values over */ 31669566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 31673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31683e90b805SBarry Smith } 31693e90b805SBarry Smith 3170cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3171cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3172273d9f13SBarry Smith 3173f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3174d71ae5a4SJacob Faibussowitsch { 3175ad79cf63SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3176535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 31778afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3178a23d5eceSKris Buschelman 3179a23d5eceSKris Buschelman PetscFunctionBegin; 3180ad79cf63SBarry Smith if (B->hash_active) { 3181ad79cf63SBarry Smith PetscInt bs; 3182aea10558SJacob Faibussowitsch B->ops[0] = b->cops; 3183ad79cf63SBarry Smith PetscCall(PetscHMapIJVDestroy(&b->ht)); 3184ad79cf63SBarry Smith PetscCall(MatGetBlockSize(B, &bs)); 3185ad79cf63SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3186ad79cf63SBarry Smith PetscCall(PetscFree(b->dnz)); 3187ad79cf63SBarry Smith PetscCall(PetscFree(b->bdnz)); 3188ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 3189ad79cf63SBarry Smith } 31902576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3191ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3192ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3193ab93d7beSBarry Smith nz = 0; 3194ab93d7beSBarry Smith } 31958c07d4e3SBarry Smith 319658b7e2c1SStefano Zampini PetscCall(MatSetBlockSize(B, bs)); 31979566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 31989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 31999566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3200899cda47SBarry Smith 3201899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3202899cda47SBarry Smith 3203d0f46423SBarry Smith mbs = B->rmap->n / bs; 3204d0f46423SBarry Smith nbs = B->cmap->n / bs; 3205a23d5eceSKris Buschelman bs2 = bs * bs; 3206a23d5eceSKris Buschelman 32075f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3208a23d5eceSKris Buschelman 3209a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 32105f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3211a23d5eceSKris Buschelman if (nnz) { 3212a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 32135f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 32145f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3215a23d5eceSKris Buschelman } 3216a23d5eceSKris Buschelman } 3217a23d5eceSKris Buschelman 3218d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 32199566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3220d0609cedSBarry Smith PetscOptionsEnd(); 32218c07d4e3SBarry Smith 3222a23d5eceSKris Buschelman if (!flg) { 3223a23d5eceSKris Buschelman switch (bs) { 3224a23d5eceSKris Buschelman case 1: 3225a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3226a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3227a23d5eceSKris Buschelman break; 3228a23d5eceSKris Buschelman case 2: 3229a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3230a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3231a23d5eceSKris Buschelman break; 3232a23d5eceSKris Buschelman case 3: 3233a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3234a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3235a23d5eceSKris Buschelman break; 3236a23d5eceSKris Buschelman case 4: 3237a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3238a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3239a23d5eceSKris Buschelman break; 3240a23d5eceSKris Buschelman case 5: 3241a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3242a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3243a23d5eceSKris Buschelman break; 3244a23d5eceSKris Buschelman case 6: 3245a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3246a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3247a23d5eceSKris Buschelman break; 3248a23d5eceSKris Buschelman case 7: 3249a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3250a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3251a23d5eceSKris Buschelman break; 32529371c9d4SSatish Balay case 9: { 32536679dcc1SBarry Smith PetscInt version = 1; 32549566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32556679dcc1SBarry Smith switch (version) { 32565f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32576679dcc1SBarry Smith case 1: 325896e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 325996e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3260835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32616679dcc1SBarry Smith break; 32626679dcc1SBarry Smith #endif 32636679dcc1SBarry Smith default: 326496e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 326596e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3266835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 326796e086a2SDaniel Kokron break; 32686679dcc1SBarry Smith } 32696679dcc1SBarry Smith break; 32706679dcc1SBarry Smith } 3271ebada01fSBarry Smith case 11: 3272ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3273ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3274ebada01fSBarry Smith break; 32759371c9d4SSatish Balay case 12: { 32766679dcc1SBarry Smith PetscInt version = 1; 32779566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32786679dcc1SBarry Smith switch (version) { 32796679dcc1SBarry Smith case 1: 32806679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 32816679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3282835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32838ab949d8SShri Abhyankar break; 32846679dcc1SBarry Smith case 2: 32856679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 32866679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3287835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32886679dcc1SBarry Smith break; 32896679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32906679dcc1SBarry Smith case 3: 32916679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 32926679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3293835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32946679dcc1SBarry Smith break; 32956679dcc1SBarry Smith #endif 3296a23d5eceSKris Buschelman default: 3297a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3298a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3299835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 33006679dcc1SBarry Smith break; 33016679dcc1SBarry Smith } 33026679dcc1SBarry Smith break; 33036679dcc1SBarry Smith } 33049371c9d4SSatish Balay case 15: { 33056679dcc1SBarry Smith PetscInt version = 1; 33069566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 33076679dcc1SBarry Smith switch (version) { 33086679dcc1SBarry Smith case 1: 33096679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3310835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33116679dcc1SBarry Smith break; 33126679dcc1SBarry Smith case 2: 33136679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3314835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33156679dcc1SBarry Smith break; 33166679dcc1SBarry Smith case 3: 33176679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3318835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33196679dcc1SBarry Smith break; 33206679dcc1SBarry Smith case 4: 33216679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3322835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33236679dcc1SBarry Smith break; 33246679dcc1SBarry Smith default: 33256679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 3326835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 33276679dcc1SBarry Smith break; 33286679dcc1SBarry Smith } 33296679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 33306679dcc1SBarry Smith break; 33316679dcc1SBarry Smith } 33326679dcc1SBarry Smith default: 33336679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 33346679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3335835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3336a23d5eceSKris Buschelman break; 3337a23d5eceSKris Buschelman } 3338a23d5eceSKris Buschelman } 3339e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3340a23d5eceSKris Buschelman b->mbs = mbs; 3341a23d5eceSKris Buschelman b->nbs = nbs; 3342ab93d7beSBarry Smith if (!skipallocation) { 33432ee49352SLisandro Dalcin if (!b->imax) { 33449566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 334526fbe8dcSKarl Rupp 33464fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 33472ee49352SLisandro Dalcin } 3348ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 334926fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3350a23d5eceSKris Buschelman if (!nnz) { 3351a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3352c62bd62aSJed Brown else if (nz < 0) nz = 1; 33535d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3354a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 33559566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3356a23d5eceSKris Buschelman } else { 3357c73702f5SBarry Smith PetscInt64 nz64 = 0; 33589371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 33599371c9d4SSatish Balay b->imax[i] = nnz[i]; 33609371c9d4SSatish Balay nz64 += nnz[i]; 33619371c9d4SSatish Balay } 33629566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3363a23d5eceSKris Buschelman } 3364a23d5eceSKris Buschelman 3365a23d5eceSKris Buschelman /* allocate the matrix space */ 33669566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 33679f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 33689f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3369672ba085SHong Zhang if (B->structure_only) { 33709f0612e4SBarry Smith b->free_a = PETSC_FALSE; 3371672ba085SHong Zhang } else { 33726679dcc1SBarry Smith PetscInt nzbs2 = 0; 33739566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 33749f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 33759f0612e4SBarry Smith b->free_a = PETSC_TRUE; 3376*1766d9c3SPierre Jolivet PetscCall(PetscArrayzero(b->a, nzbs2)); 3377672ba085SHong Zhang } 3378672ba085SHong Zhang b->free_ij = PETSC_TRUE; 33799f0612e4SBarry Smith PetscCall(PetscArrayzero(b->j, nz)); 3380672ba085SHong Zhang 3381a23d5eceSKris Buschelman b->i[0] = 0; 3382ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3383e811da20SHong Zhang } else { 3384e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3385e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3386ab93d7beSBarry Smith } 3387a23d5eceSKris Buschelman 3388a23d5eceSKris Buschelman b->bs2 = bs2; 3389a23d5eceSKris Buschelman b->mbs = mbs; 3390a23d5eceSKris Buschelman b->nz = 0; 3391b32cb4a7SJed Brown b->maxnz = nz; 3392b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3393cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3394cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 33959566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 33963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3397a23d5eceSKris Buschelman } 3398a23d5eceSKris Buschelman 339966976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3400d71ae5a4SJacob Faibussowitsch { 3401725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3402f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3403d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3404725b52f3SLisandro Dalcin 3405725b52f3SLisandro Dalcin PetscFunctionBegin; 34065f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 34079566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 34089566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 34099566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 34109566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 34119566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3412d0f46423SBarry Smith m = B->rmap->n / bs; 3413725b52f3SLisandro Dalcin 34145f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 34159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3416725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3417cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 34185f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3419725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3420725b52f3SLisandro Dalcin nnz[i] = nz; 3421725b52f3SLisandro Dalcin } 34229566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 34239566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3424725b52f3SLisandro Dalcin 3425725b52f3SLisandro Dalcin values = (PetscScalar *)V; 342648a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3427725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3428cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3429cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3430bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3431cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 34329566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 34333adadaf3SJed Brown } else { 34343adadaf3SJed Brown PetscInt j; 34353adadaf3SJed Brown for (j = 0; j < ncols; j++) { 34363adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 34379566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 34383adadaf3SJed Brown } 34393adadaf3SJed Brown } 3440725b52f3SLisandro Dalcin } 34419566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 34429566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 34439566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 34449566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3446725b52f3SLisandro Dalcin } 3447725b52f3SLisandro Dalcin 3448cda14afcSprj- /*@C 344911a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3450cda14afcSprj- 3451cda14afcSprj- Not Collective 3452cda14afcSprj- 3453cda14afcSprj- Input Parameter: 3454fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix 3455cda14afcSprj- 3456cda14afcSprj- Output Parameter: 3457cda14afcSprj- . array - pointer to the data 3458cda14afcSprj- 3459cda14afcSprj- Level: intermediate 3460cda14afcSprj- 34611cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3462cda14afcSprj- @*/ 34635d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3464d71ae5a4SJacob Faibussowitsch { 3465cda14afcSprj- PetscFunctionBegin; 3466cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 34673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3468cda14afcSprj- } 3469cda14afcSprj- 3470cda14afcSprj- /*@C 347111a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3472cda14afcSprj- 3473cda14afcSprj- Not Collective 3474cda14afcSprj- 3475cda14afcSprj- Input Parameters: 3476fe59aa6dSJacob Faibussowitsch + A - a `MATSEQBAIJ` matrix 3477cda14afcSprj- - array - pointer to the data 3478cda14afcSprj- 3479cda14afcSprj- Level: intermediate 3480cda14afcSprj- 34811cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3482cda14afcSprj- @*/ 34835d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3484d71ae5a4SJacob Faibussowitsch { 3485cda14afcSprj- PetscFunctionBegin; 3486cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 34873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3488cda14afcSprj- } 3489cda14afcSprj- 34900bad9183SKris Buschelman /*MC 3491fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 34920bad9183SKris Buschelman block sparse compressed row format. 34930bad9183SKris Buschelman 34940bad9183SKris Buschelman Options Database Keys: 349520f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 34966679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 34970bad9183SKris Buschelman 34980bad9183SKris Buschelman Level: beginner 34990cd7f59aSBarry Smith 35000cd7f59aSBarry Smith Notes: 350111a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 350211a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 35030bad9183SKris Buschelman 35042ef1f0ffSBarry Smith Run with `-info` to see what version of the matrix-vector product is being used 35056679dcc1SBarry Smith 35061cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 35070bad9183SKris Buschelman M*/ 35080bad9183SKris Buschelman 3509cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3510b24902e0SBarry Smith 3511d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3512d71ae5a4SJacob Faibussowitsch { 3513c1ac3661SBarry Smith PetscMPIInt size; 3514b6490206SBarry Smith Mat_SeqBAIJ *b; 35153b2fbd54SBarry Smith 35163a40ed3dSBarry Smith PetscFunctionBegin; 35179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 35185f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3519b6490206SBarry Smith 35204dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 3521b0a32e0cSBarry Smith B->data = (void *)b; 3522aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values; 352326fbe8dcSKarl Rupp 3524f4259b30SLisandro Dalcin b->row = NULL; 3525f4259b30SLisandro Dalcin b->col = NULL; 3526f4259b30SLisandro Dalcin b->icol = NULL; 35272593348eSBarry Smith b->reallocs = 0; 3528f4259b30SLisandro Dalcin b->saved_values = NULL; 35292593348eSBarry Smith 3530c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 35312593348eSBarry Smith b->nonew = 0; 3532f4259b30SLisandro Dalcin b->diag = NULL; 3533f4259b30SLisandro Dalcin B->spptr = NULL; 3534b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3535a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 35364e220ebcSLois Curfman McInnes 35379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 35389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 35399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 35409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 35419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 35429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 35439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 35449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 35459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 35469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 35477ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 35489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 35497ea3e4caSstefano_zampini #endif 35509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 35519566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 35523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35532593348eSBarry Smith } 35542593348eSBarry Smith 3555d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3556d71ae5a4SJacob Faibussowitsch { 3557b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3558a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3559de6a44a3SBarry Smith 35603a40ed3dSBarry Smith PetscFunctionBegin; 356131fe6a7dSBarry Smith PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 35625f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 35632593348eSBarry Smith 35644fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35654fd072dbSBarry Smith c->imax = a->imax; 35664fd072dbSBarry Smith c->ilen = a->ilen; 35674fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 35684fd072dbSBarry Smith } else { 35699566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3570b6490206SBarry Smith for (i = 0; i < mbs; i++) { 35712593348eSBarry Smith c->imax[i] = a->imax[i]; 35722593348eSBarry Smith c->ilen[i] = a->ilen[i]; 35732593348eSBarry Smith } 35744fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 35754fd072dbSBarry Smith } 35762593348eSBarry Smith 35772593348eSBarry Smith /* allocate the matrix space */ 357816a2bf60SHong Zhang if (mallocmatspace) { 35794fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35809f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 35819f0612e4SBarry Smith PetscCall(PetscArrayzero(c->a, bs2 * nz)); 35829f0612e4SBarry Smith c->free_a = PETSC_TRUE; 35834fd072dbSBarry Smith c->i = a->i; 35844fd072dbSBarry Smith c->j = a->j; 3585379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 35864fd072dbSBarry Smith c->parent = A; 35871e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 35881e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 358926fbe8dcSKarl Rupp 35909566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 35919566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35929566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35934fd072dbSBarry Smith } else { 35949f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 35959f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 35969f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3597379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 35984fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 359926fbe8dcSKarl Rupp 36009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3601b6490206SBarry Smith if (mbs > 0) { 36029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 36032e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 36049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 36052e8a6d31SBarry Smith } else { 36069566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 36072593348eSBarry Smith } 36082593348eSBarry Smith } 36091e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 36101e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 361116a2bf60SHong Zhang } 36124fd072dbSBarry Smith } 361316a2bf60SHong Zhang 36142593348eSBarry Smith c->roworiented = a->roworiented; 36152593348eSBarry Smith c->nonew = a->nonew; 361626fbe8dcSKarl Rupp 36179566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 36189566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 361926fbe8dcSKarl Rupp 36205c9eb25fSBarry Smith c->bs2 = a->bs2; 36215c9eb25fSBarry Smith c->mbs = a->mbs; 36225c9eb25fSBarry Smith c->nbs = a->nbs; 36232593348eSBarry Smith 36242593348eSBarry Smith if (a->diag) { 36254fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 36264fd072dbSBarry Smith c->diag = a->diag; 36274fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 36284fd072dbSBarry Smith } else { 36299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 363026fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 36314fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 36324fd072dbSBarry Smith } 3633f4259b30SLisandro Dalcin } else c->diag = NULL; 363426fbe8dcSKarl Rupp 36352593348eSBarry Smith c->nz = a->nz; 3636f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3637f361c04dSBarry Smith c->solve_work = NULL; 3638f361c04dSBarry Smith c->mult_work = NULL; 3639f361c04dSBarry Smith c->sor_workt = NULL; 3640f361c04dSBarry Smith c->sor_work = NULL; 364188e51ccdSHong Zhang 364288e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 364388e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3644cd6b891eSBarry Smith if (a->compressedrow.use) { 364588e51ccdSHong Zhang i = a->compressedrow.nrows; 36469566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 36479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 36489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 364988e51ccdSHong Zhang } else { 365088e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 36510298fd71SBarry Smith c->compressedrow.i = NULL; 36520298fd71SBarry Smith c->compressedrow.rindex = NULL; 365388e51ccdSHong Zhang } 3654c05f355bSMark Adams c->nonzerorowcnt = a->nonzerorowcnt; 3655e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 365626fbe8dcSKarl Rupp 36579566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 36583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36592593348eSBarry Smith } 36602593348eSBarry Smith 3661d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3662d71ae5a4SJacob Faibussowitsch { 3663b24902e0SBarry Smith PetscFunctionBegin; 36649566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 36659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 36669566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 36679566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 36683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3669b24902e0SBarry Smith } 3670b24902e0SBarry Smith 3671618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3672d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3673d71ae5a4SJacob Faibussowitsch { 3674b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3675b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3676b51a4376SLisandro Dalcin PetscScalar *matvals; 3677b51a4376SLisandro Dalcin 3678b51a4376SLisandro Dalcin PetscFunctionBegin; 36799566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3680b51a4376SLisandro Dalcin 3681b51a4376SLisandro Dalcin /* read matrix header */ 36829566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 36835f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 36849371c9d4SSatish Balay M = header[1]; 36859371c9d4SSatish Balay N = header[2]; 36869371c9d4SSatish Balay nz = header[3]; 36875f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 36885f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 36895f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3690b51a4376SLisandro Dalcin 3691b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 36929566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3693b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3694b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3695b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3696b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3697b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 36989566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 36999566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3700b51a4376SLisandro Dalcin 3701b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 37029566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 37035f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 37049566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 37059566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 37069371c9d4SSatish Balay mbs = m / bs; 37079371c9d4SSatish Balay nbs = n / bs; 3708b51a4376SLisandro Dalcin 3709b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 37109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 37119566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 37129371c9d4SSatish Balay rowidxs[0] = 0; 37139371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3714b51a4376SLisandro Dalcin sum = rowidxs[m]; 37155f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3716b51a4376SLisandro Dalcin 3717b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 37189566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 37199566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 37209566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3721b51a4376SLisandro Dalcin 3722b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3723b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3724b51a4376SLisandro Dalcin PetscInt *nnz; 3725618cc2edSLisandro Dalcin PetscBool sbaij; 3726b51a4376SLisandro Dalcin 37279566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 37289566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 37299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3730b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 37319566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3732618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3733618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3734618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3735618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3736618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3737618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3738618cc2edSLisandro Dalcin } 3739618cc2edSLisandro Dalcin } 3740b51a4376SLisandro Dalcin } 37419566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 37429566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 37439566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 37449566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3745b51a4376SLisandro Dalcin } 3746b51a4376SLisandro Dalcin 3747b51a4376SLisandro Dalcin /* store matrix values */ 3748b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3749b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 37509927e4dfSBarry Smith PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3751b51a4376SLisandro Dalcin } 3752b51a4376SLisandro Dalcin 37539566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 37549566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 37559566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 37569566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 37573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3758b51a4376SLisandro Dalcin } 3759b51a4376SLisandro Dalcin 3760d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3761d71ae5a4SJacob Faibussowitsch { 37627f489da9SVaclav Hapla PetscBool isbinary; 3763f501eaabSShri Abhyankar 3764f501eaabSShri Abhyankar PetscFunctionBegin; 37659566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 37665f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 37679566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 37683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3769f501eaabSShri Abhyankar } 3770f501eaabSShri Abhyankar 37715d83a8b1SBarry Smith /*@ 377211a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3773273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 377420f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 377520f4b53cSBarry Smith (or the array `nnz`). 37762593348eSBarry Smith 3777d083f849SBarry Smith Collective 3778273d9f13SBarry Smith 3779273d9f13SBarry Smith Input Parameters: 378011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 378111a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 378211a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3783273d9f13SBarry Smith . m - number of rows 3784273d9f13SBarry Smith . n - number of columns 378535d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 378635d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 378720f4b53cSBarry Smith (possibly different for each block row) or `NULL` 3788273d9f13SBarry Smith 3789273d9f13SBarry Smith Output Parameter: 3790273d9f13SBarry Smith . A - the matrix 3791273d9f13SBarry Smith 3792273d9f13SBarry Smith Options Database Keys: 379311a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3794a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3795273d9f13SBarry Smith 3796273d9f13SBarry Smith Level: intermediate 3797273d9f13SBarry Smith 3798273d9f13SBarry Smith Notes: 379977433607SBarry Smith It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 38002ef1f0ffSBarry Smith MatXXXXSetPreallocation() paradigm instead of this routine directly. 38012ef1f0ffSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 38022ef1f0ffSBarry Smith 3803d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3804d1be2dadSMatthew Knepley 38052ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 380649a6f317SBarry Smith 380735d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 380835d8aa7fSBarry Smith 38092ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3810273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 381120f4b53cSBarry Smith either one (as in Fortran) or zero. 3812273d9f13SBarry Smith 38132ef1f0ffSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 38142ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3815651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3816273d9f13SBarry Smith matrices. 3817273d9f13SBarry Smith 38181cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3819273d9f13SBarry Smith @*/ 3820d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3821d71ae5a4SJacob Faibussowitsch { 3822273d9f13SBarry Smith PetscFunctionBegin; 38239566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 38249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 38259566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 38269566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 38273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3828273d9f13SBarry Smith } 3829273d9f13SBarry Smith 38305d83a8b1SBarry Smith /*@ 3831273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3832273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 383320f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 383420f4b53cSBarry Smith (or the array `nnz`). 3835273d9f13SBarry Smith 3836d083f849SBarry Smith Collective 3837273d9f13SBarry Smith 3838273d9f13SBarry Smith Input Parameters: 38391c4f3114SJed Brown + B - the matrix 384011a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 384111a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3842273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3843273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 38442ef1f0ffSBarry Smith (possibly different for each block row) or `NULL` 3845273d9f13SBarry Smith 3846273d9f13SBarry Smith Options Database Keys: 384711a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3848a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3849273d9f13SBarry Smith 3850273d9f13SBarry Smith Level: intermediate 3851273d9f13SBarry Smith 3852273d9f13SBarry Smith Notes: 38532ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 385449a6f317SBarry Smith 385511a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 3856aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 385720f4b53cSBarry Smith You can also run with the option `-info` and look for messages with the string 3858aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3859aa95bbe8SBarry Smith 38602ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3861273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 386220f4b53cSBarry Smith either one (as in Fortran) or zero. 3863273d9f13SBarry Smith 3864d8a51d2aSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 38652ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3866651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3867273d9f13SBarry Smith 38681cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3869273d9f13SBarry Smith @*/ 3870d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3871d71ae5a4SJacob Faibussowitsch { 3872273d9f13SBarry Smith PetscFunctionBegin; 38736ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 38746ba663aaSJed Brown PetscValidType(B, 1); 38756ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3876cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 38773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3878273d9f13SBarry Smith } 3879a1d92eedSBarry Smith 3880725b52f3SLisandro Dalcin /*@C 388111a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3882725b52f3SLisandro Dalcin 3883d083f849SBarry Smith Collective 3884725b52f3SLisandro Dalcin 3885725b52f3SLisandro Dalcin Input Parameters: 38861c4f3114SJed Brown + B - the matrix 388720f4b53cSBarry Smith . bs - the blocksize 3888d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero) 3889d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3890d8a51d2aSBarry Smith - v - optional values in the matrix, use `NULL` if not provided 3891725b52f3SLisandro Dalcin 3892664954b6SBarry Smith Level: advanced 3893725b52f3SLisandro Dalcin 38943adadaf3SJed Brown Notes: 3895d8a51d2aSBarry Smith The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3896d8a51d2aSBarry Smith 389711a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 389811a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 38993adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 390011a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 39013adadaf3SJed Brown block column and the second index is over columns within a block. 39023adadaf3SJed Brown 3903664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3904664954b6SBarry Smith 39051cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3906725b52f3SLisandro Dalcin @*/ 3907d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3908d71ae5a4SJacob Faibussowitsch { 3909725b52f3SLisandro Dalcin PetscFunctionBegin; 39106ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 39116ba663aaSJed Brown PetscValidType(B, 1); 39126ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3913cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 39143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3915725b52f3SLisandro Dalcin } 3916725b52f3SLisandro Dalcin 3917c75a6043SHong Zhang /*@ 391811a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3919c75a6043SHong Zhang 3920d083f849SBarry Smith Collective 3921c75a6043SHong Zhang 3922c75a6043SHong Zhang Input Parameters: 3923c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3924c75a6043SHong Zhang . bs - size of block 3925c75a6043SHong Zhang . m - number of rows 3926c75a6043SHong Zhang . n - number of columns 3927483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3928c75a6043SHong Zhang . j - column indices 3929c75a6043SHong Zhang - a - matrix values 3930c75a6043SHong Zhang 3931c75a6043SHong Zhang Output Parameter: 3932c75a6043SHong Zhang . mat - the matrix 3933c75a6043SHong Zhang 3934dfb205c3SBarry Smith Level: advanced 3935c75a6043SHong Zhang 3936c75a6043SHong Zhang Notes: 39372ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3938c75a6043SHong Zhang once the matrix is destroyed 3939c75a6043SHong Zhang 3940c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3941c75a6043SHong Zhang 39422ef1f0ffSBarry Smith The `i` and `j` indices are 0 based 3943c75a6043SHong Zhang 394411a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3945dfb205c3SBarry Smith 39463adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 39473adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 39483adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 39493adadaf3SJed Brown with column-major ordering within blocks. 3950dfb205c3SBarry Smith 39511cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3952c75a6043SHong Zhang @*/ 3953d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3954d71ae5a4SJacob Faibussowitsch { 3955c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3956c75a6043SHong Zhang 3957c75a6043SHong Zhang PetscFunctionBegin; 39585f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 39595f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3960c75a6043SHong Zhang 39619566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 39629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 39639566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 39649566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3965c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 39669566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3967c75a6043SHong Zhang 3968c75a6043SHong Zhang baij->i = i; 3969c75a6043SHong Zhang baij->j = j; 3970c75a6043SHong Zhang baij->a = a; 397126fbe8dcSKarl Rupp 3972c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3973e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3974e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3975ceb5bf51SJacob Faibussowitsch baij->free_imax_ilen = PETSC_TRUE; 3976c75a6043SHong Zhang 3977ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < m; ii++) { 3978ceb5bf51SJacob Faibussowitsch const PetscInt row_len = i[ii + 1] - i[ii]; 3979ceb5bf51SJacob Faibussowitsch 3980ceb5bf51SJacob Faibussowitsch baij->ilen[ii] = baij->imax[ii] = row_len; 3981ceb5bf51SJacob Faibussowitsch PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3982c75a6043SHong Zhang } 398376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3984ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 39856bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 39866bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3987c75a6043SHong Zhang } 398876bd3646SJed Brown } 3989c75a6043SHong Zhang 39909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 39919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 39923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3993c75a6043SHong Zhang } 3994bdf6f3fcSHong Zhang 3995d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3996d71ae5a4SJacob Faibussowitsch { 3997bdf6f3fcSHong Zhang PetscFunctionBegin; 39989566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 39993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4000bdf6f3fcSHong Zhang } 4001