12593348eSBarry Smith /* 2b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 32593348eSBarry Smith matrix storage format. 42593348eSBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6c6db04a5SJed Brown #include <petscblaslapack.h> 7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 943516a2dSKris Buschelman 1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 1126cec326SBarry Smith #define TYPE BAIJ 1226cec326SBarry Smith #define TYPE_BS 1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1426cec326SBarry Smith #undef TYPE_BS 1526cec326SBarry Smith #define TYPE_BS _BS 1626cec326SBarry Smith #define TYPE_BS_ON 1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1826cec326SBarry Smith #undef TYPE_BS 1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h" 2026cec326SBarry Smith #undef TYPE 2126cec326SBarry Smith #undef TYPE_BS_ON 2226cec326SBarry Smith 237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 257ea3e4caSstefano_zampini #endif 267ea3e4caSstefano_zampini 27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29b5b72c8aSIrina Sokolova #endif 30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31b5b72c8aSIrina Sokolova 32ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 33d71ae5a4SJacob Faibussowitsch { 349463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 35ff6a9541SJacob Faibussowitsch PetscInt m, n, ib, jb, bs = A->rmap->bs; 369463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 379463ebdaSPierre Jolivet 389463ebdaSPierre Jolivet PetscFunctionBegin; 399566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 40ff6a9541SJacob Faibussowitsch PetscCall(PetscArrayzero(reductions, n)); 419463ebdaSPierre Jolivet if (type == NORM_2) { 42ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 439463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 449463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 45857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 469463ebdaSPierre Jolivet a_val++; 479463ebdaSPierre Jolivet } 489463ebdaSPierre Jolivet } 499463ebdaSPierre Jolivet } 509463ebdaSPierre Jolivet } else if (type == NORM_1) { 51ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 529463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 539463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 54857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 559463ebdaSPierre Jolivet a_val++; 569463ebdaSPierre Jolivet } 579463ebdaSPierre Jolivet } 589463ebdaSPierre Jolivet } 599463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 60ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 619463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 629463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 639463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 64857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 659463ebdaSPierre Jolivet a_val++; 669463ebdaSPierre Jolivet } 679463ebdaSPierre Jolivet } 689463ebdaSPierre Jolivet } 69857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 70ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 71857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 72857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 73857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 74857cbf51SRichard Tran Mills a_val++; 75857cbf51SRichard Tran Mills } 76857cbf51SRichard Tran Mills } 77857cbf51SRichard Tran Mills } 78857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 80857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 81857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 82857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 83857cbf51SRichard Tran Mills a_val++; 84857cbf51SRichard Tran Mills } 85857cbf51SRichard Tran Mills } 86857cbf51SRichard Tran Mills } 87857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 889463ebdaSPierre Jolivet if (type == NORM_2) { 89ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 90857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 91ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 929463ebdaSPierre Jolivet } 933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 949463ebdaSPierre Jolivet } 959463ebdaSPierre Jolivet 9666976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 97d71ae5a4SJacob Faibussowitsch { 98b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99de80f912SBarry Smith PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 1007f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 10162bba022SBarry Smith PetscReal shift = 0.0; 1021a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 103b01c7715SBarry Smith 104b01c7715SBarry Smith PetscFunctionBegin; 105a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 106a455e926SHong Zhang 1079797317bSBarry Smith if (a->idiagvalid) { 1089797317bSBarry Smith if (values) *values = a->idiag; 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1109797317bSBarry Smith } 1119566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 112b01c7715SBarry Smith diag_offset = a->diag; 1134dfa11a4SJacob Faibussowitsch if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 114b01c7715SBarry Smith diag = a->idiag; 115bbead8a2SBarry Smith if (values) *values = a->idiag; 116b01c7715SBarry Smith /* factor and invert each block */ 117521d7252SBarry Smith switch (bs) { 118ab040260SJed Brown case 1: 119ab040260SJed Brown for (i = 0; i < mbs; i++) { 120ab040260SJed Brown odiag = v + 1 * diag_offset[i]; 121ab040260SJed Brown diag[0] = odiag[0]; 122ec1892c8SHong Zhang 123ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 124ec1892c8SHong Zhang if (allowzeropivot) { 1257b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1267b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1277b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 12998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 130ec1892c8SHong Zhang } 131ec1892c8SHong Zhang 132d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 133ab040260SJed Brown diag += 1; 134ab040260SJed Brown } 135ab040260SJed Brown break; 136b01c7715SBarry Smith case 2: 137b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 138b01c7715SBarry Smith odiag = v + 4 * diag_offset[i]; 1399371c9d4SSatish Balay diag[0] = odiag[0]; 1409371c9d4SSatish Balay diag[1] = odiag[1]; 1419371c9d4SSatish Balay diag[2] = odiag[2]; 1429371c9d4SSatish Balay diag[3] = odiag[3]; 1439566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1447b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145b01c7715SBarry Smith diag += 4; 146b01c7715SBarry Smith } 147b01c7715SBarry Smith break; 148b01c7715SBarry Smith case 3: 149b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 150b01c7715SBarry Smith odiag = v + 9 * diag_offset[i]; 1519371c9d4SSatish Balay diag[0] = odiag[0]; 1529371c9d4SSatish Balay diag[1] = odiag[1]; 1539371c9d4SSatish Balay diag[2] = odiag[2]; 1549371c9d4SSatish Balay diag[3] = odiag[3]; 1559371c9d4SSatish Balay diag[4] = odiag[4]; 1569371c9d4SSatish Balay diag[5] = odiag[5]; 1579371c9d4SSatish Balay diag[6] = odiag[6]; 1589371c9d4SSatish Balay diag[7] = odiag[7]; 159b01c7715SBarry Smith diag[8] = odiag[8]; 1609566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1617b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 162b01c7715SBarry Smith diag += 9; 163b01c7715SBarry Smith } 164b01c7715SBarry Smith break; 165b01c7715SBarry Smith case 4: 166b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 167b01c7715SBarry Smith odiag = v + 16 * diag_offset[i]; 1689566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1699566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1707b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 171b01c7715SBarry Smith diag += 16; 172b01c7715SBarry Smith } 173b01c7715SBarry Smith break; 174b01c7715SBarry Smith case 5: 175b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 176b01c7715SBarry Smith odiag = v + 25 * diag_offset[i]; 1779566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1789566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1797b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 180b01c7715SBarry Smith diag += 25; 181b01c7715SBarry Smith } 182b01c7715SBarry Smith break; 183d49b2adcSBarry Smith case 6: 184d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 185d49b2adcSBarry Smith odiag = v + 36 * diag_offset[i]; 1869566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1879566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1887b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 189d49b2adcSBarry Smith diag += 36; 190d49b2adcSBarry Smith } 191d49b2adcSBarry Smith break; 192de80f912SBarry Smith case 7: 193de80f912SBarry Smith for (i = 0; i < mbs; i++) { 194de80f912SBarry Smith odiag = v + 49 * diag_offset[i]; 1959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1969566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1977b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 198de80f912SBarry Smith diag += 49; 199de80f912SBarry Smith } 200de80f912SBarry Smith break; 201b01c7715SBarry Smith default: 2029566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 203de80f912SBarry Smith for (i = 0; i < mbs; i++) { 204de80f912SBarry Smith odiag = v + bs2 * diag_offset[i]; 2059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 2069566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 2077b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 208de80f912SBarry Smith diag += bs2; 209de80f912SBarry Smith } 2109566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 211b01c7715SBarry Smith } 212b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 2133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 214b01c7715SBarry Smith } 215b01c7715SBarry Smith 21666976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 217d71ae5a4SJacob Faibussowitsch { 2186d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 219e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 220e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 221e48d15efSToby Isaac const PetscScalar *b, *xb; 2225455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 223e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 224c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 225b01c7715SBarry Smith 226b01c7715SBarry Smith PetscFunctionBegin; 227b01c7715SBarry Smith its = its * lits; 2285f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2295f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2305f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2315f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2325f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 233b01c7715SBarry Smith 2349566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 235b01c7715SBarry Smith 2363ba16761SJacob Faibussowitsch if (!m) PetscFunctionReturn(PETSC_SUCCESS); 237b01c7715SBarry Smith diag = a->diag; 238b01c7715SBarry Smith idiag = a->idiag; 239de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 24048a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 24148a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 24248a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2433475c22fSBarry Smith work = a->mult_work; 2443475c22fSBarry Smith t = a->sor_workt; 245de80f912SBarry Smith w = a->sor_work; 246de80f912SBarry Smith 2479566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 249de80f912SBarry Smith 250de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 251de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 252e48d15efSToby Isaac switch (bs) { 253e48d15efSToby Isaac case 1: 254e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 255e48d15efSToby Isaac t[0] = b[0]; 256e48d15efSToby Isaac i2 = 1; 257e48d15efSToby Isaac idiag += 1; 258e48d15efSToby Isaac for (i = 1; i < m; i++) { 259e48d15efSToby Isaac v = aa + ai[i]; 260e48d15efSToby Isaac vi = aj + ai[i]; 261e48d15efSToby Isaac nz = diag[i] - ai[i]; 262e48d15efSToby Isaac s[0] = b[i2]; 263e48d15efSToby Isaac for (j = 0; j < nz; j++) { 264e48d15efSToby Isaac xw[0] = x[vi[j]]; 265e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 266e48d15efSToby Isaac } 267e48d15efSToby Isaac t[i2] = s[0]; 268e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 269e48d15efSToby Isaac x[i2] = xw[0]; 270e48d15efSToby Isaac idiag += 1; 271e48d15efSToby Isaac i2 += 1; 272e48d15efSToby Isaac } 273e48d15efSToby Isaac break; 274e48d15efSToby Isaac case 2: 275e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2769371c9d4SSatish Balay t[0] = b[0]; 2779371c9d4SSatish Balay t[1] = b[1]; 278e48d15efSToby Isaac i2 = 2; 279e48d15efSToby Isaac idiag += 4; 280e48d15efSToby Isaac for (i = 1; i < m; i++) { 281e48d15efSToby Isaac v = aa + 4 * ai[i]; 282e48d15efSToby Isaac vi = aj + ai[i]; 283e48d15efSToby Isaac nz = diag[i] - ai[i]; 2849371c9d4SSatish Balay s[0] = b[i2]; 2859371c9d4SSatish Balay s[1] = b[i2 + 1]; 286e48d15efSToby Isaac for (j = 0; j < nz; j++) { 287e48d15efSToby Isaac idx = 2 * vi[j]; 288e48d15efSToby Isaac it = 4 * j; 2899371c9d4SSatish Balay xw[0] = x[idx]; 2909371c9d4SSatish Balay xw[1] = x[1 + idx]; 291e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 292e48d15efSToby Isaac } 2939371c9d4SSatish Balay t[i2] = s[0]; 2949371c9d4SSatish Balay t[i2 + 1] = s[1]; 295e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2969371c9d4SSatish Balay x[i2] = xw[0]; 2979371c9d4SSatish Balay x[i2 + 1] = xw[1]; 298e48d15efSToby Isaac idiag += 4; 299e48d15efSToby Isaac i2 += 2; 300e48d15efSToby Isaac } 301e48d15efSToby Isaac break; 302e48d15efSToby Isaac case 3: 303e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 3049371c9d4SSatish Balay t[0] = b[0]; 3059371c9d4SSatish Balay t[1] = b[1]; 3069371c9d4SSatish Balay t[2] = b[2]; 307e48d15efSToby Isaac i2 = 3; 308e48d15efSToby Isaac idiag += 9; 309e48d15efSToby Isaac for (i = 1; i < m; i++) { 310e48d15efSToby Isaac v = aa + 9 * ai[i]; 311e48d15efSToby Isaac vi = aj + ai[i]; 312e48d15efSToby Isaac nz = diag[i] - ai[i]; 3139371c9d4SSatish Balay s[0] = b[i2]; 3149371c9d4SSatish Balay s[1] = b[i2 + 1]; 3159371c9d4SSatish Balay s[2] = b[i2 + 2]; 316e48d15efSToby Isaac while (nz--) { 317e48d15efSToby Isaac idx = 3 * (*vi++); 3189371c9d4SSatish Balay xw[0] = x[idx]; 3199371c9d4SSatish Balay xw[1] = x[1 + idx]; 3209371c9d4SSatish Balay xw[2] = x[2 + idx]; 321e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 322e48d15efSToby Isaac v += 9; 323e48d15efSToby Isaac } 3249371c9d4SSatish Balay t[i2] = s[0]; 3259371c9d4SSatish Balay t[i2 + 1] = s[1]; 3269371c9d4SSatish Balay t[i2 + 2] = s[2]; 327e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3289371c9d4SSatish Balay x[i2] = xw[0]; 3299371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3309371c9d4SSatish Balay x[i2 + 2] = xw[2]; 331e48d15efSToby Isaac idiag += 9; 332e48d15efSToby Isaac i2 += 3; 333e48d15efSToby Isaac } 334e48d15efSToby Isaac break; 335e48d15efSToby Isaac case 4: 336e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3379371c9d4SSatish Balay t[0] = b[0]; 3389371c9d4SSatish Balay t[1] = b[1]; 3399371c9d4SSatish Balay t[2] = b[2]; 3409371c9d4SSatish Balay t[3] = b[3]; 341e48d15efSToby Isaac i2 = 4; 342e48d15efSToby Isaac idiag += 16; 343e48d15efSToby Isaac for (i = 1; i < m; i++) { 344e48d15efSToby Isaac v = aa + 16 * ai[i]; 345e48d15efSToby Isaac vi = aj + ai[i]; 346e48d15efSToby Isaac nz = diag[i] - ai[i]; 3479371c9d4SSatish Balay s[0] = b[i2]; 3489371c9d4SSatish Balay s[1] = b[i2 + 1]; 3499371c9d4SSatish Balay s[2] = b[i2 + 2]; 3509371c9d4SSatish Balay s[3] = b[i2 + 3]; 351e48d15efSToby Isaac while (nz--) { 352e48d15efSToby Isaac idx = 4 * (*vi++); 3539371c9d4SSatish Balay xw[0] = x[idx]; 3549371c9d4SSatish Balay xw[1] = x[1 + idx]; 3559371c9d4SSatish Balay xw[2] = x[2 + idx]; 3569371c9d4SSatish Balay xw[3] = x[3 + idx]; 357e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 358e48d15efSToby Isaac v += 16; 359e48d15efSToby Isaac } 3609371c9d4SSatish Balay t[i2] = s[0]; 3619371c9d4SSatish Balay t[i2 + 1] = s[1]; 3629371c9d4SSatish Balay t[i2 + 2] = s[2]; 3639371c9d4SSatish Balay t[i2 + 3] = s[3]; 364e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3659371c9d4SSatish Balay x[i2] = xw[0]; 3669371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3679371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3689371c9d4SSatish Balay x[i2 + 3] = xw[3]; 369e48d15efSToby Isaac idiag += 16; 370e48d15efSToby Isaac i2 += 4; 371e48d15efSToby Isaac } 372e48d15efSToby Isaac break; 373e48d15efSToby Isaac case 5: 374e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3759371c9d4SSatish Balay t[0] = b[0]; 3769371c9d4SSatish Balay t[1] = b[1]; 3779371c9d4SSatish Balay t[2] = b[2]; 3789371c9d4SSatish Balay t[3] = b[3]; 3799371c9d4SSatish Balay t[4] = b[4]; 380e48d15efSToby Isaac i2 = 5; 381e48d15efSToby Isaac idiag += 25; 382e48d15efSToby Isaac for (i = 1; i < m; i++) { 383e48d15efSToby Isaac v = aa + 25 * ai[i]; 384e48d15efSToby Isaac vi = aj + ai[i]; 385e48d15efSToby Isaac nz = diag[i] - ai[i]; 3869371c9d4SSatish Balay s[0] = b[i2]; 3879371c9d4SSatish Balay s[1] = b[i2 + 1]; 3889371c9d4SSatish Balay s[2] = b[i2 + 2]; 3899371c9d4SSatish Balay s[3] = b[i2 + 3]; 3909371c9d4SSatish Balay s[4] = b[i2 + 4]; 391e48d15efSToby Isaac while (nz--) { 392e48d15efSToby Isaac idx = 5 * (*vi++); 3939371c9d4SSatish Balay xw[0] = x[idx]; 3949371c9d4SSatish Balay xw[1] = x[1 + idx]; 3959371c9d4SSatish Balay xw[2] = x[2 + idx]; 3969371c9d4SSatish Balay xw[3] = x[3 + idx]; 3979371c9d4SSatish Balay xw[4] = x[4 + idx]; 398e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 399e48d15efSToby Isaac v += 25; 400e48d15efSToby Isaac } 4019371c9d4SSatish Balay t[i2] = s[0]; 4029371c9d4SSatish Balay t[i2 + 1] = s[1]; 4039371c9d4SSatish Balay t[i2 + 2] = s[2]; 4049371c9d4SSatish Balay t[i2 + 3] = s[3]; 4059371c9d4SSatish Balay t[i2 + 4] = s[4]; 406e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 4079371c9d4SSatish Balay x[i2] = xw[0]; 4089371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4099371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4109371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4119371c9d4SSatish Balay x[i2 + 4] = xw[4]; 412e48d15efSToby Isaac idiag += 25; 413e48d15efSToby Isaac i2 += 5; 414e48d15efSToby Isaac } 415e48d15efSToby Isaac break; 416e48d15efSToby Isaac case 6: 417e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4189371c9d4SSatish Balay t[0] = b[0]; 4199371c9d4SSatish Balay t[1] = b[1]; 4209371c9d4SSatish Balay t[2] = b[2]; 4219371c9d4SSatish Balay t[3] = b[3]; 4229371c9d4SSatish Balay t[4] = b[4]; 4239371c9d4SSatish Balay t[5] = b[5]; 424e48d15efSToby Isaac i2 = 6; 425e48d15efSToby Isaac idiag += 36; 426e48d15efSToby Isaac for (i = 1; i < m; i++) { 427e48d15efSToby Isaac v = aa + 36 * ai[i]; 428e48d15efSToby Isaac vi = aj + ai[i]; 429e48d15efSToby Isaac nz = diag[i] - ai[i]; 4309371c9d4SSatish Balay s[0] = b[i2]; 4319371c9d4SSatish Balay s[1] = b[i2 + 1]; 4329371c9d4SSatish Balay s[2] = b[i2 + 2]; 4339371c9d4SSatish Balay s[3] = b[i2 + 3]; 4349371c9d4SSatish Balay s[4] = b[i2 + 4]; 4359371c9d4SSatish Balay s[5] = b[i2 + 5]; 436e48d15efSToby Isaac while (nz--) { 437e48d15efSToby Isaac idx = 6 * (*vi++); 4389371c9d4SSatish Balay xw[0] = x[idx]; 4399371c9d4SSatish Balay xw[1] = x[1 + idx]; 4409371c9d4SSatish Balay xw[2] = x[2 + idx]; 4419371c9d4SSatish Balay xw[3] = x[3 + idx]; 4429371c9d4SSatish Balay xw[4] = x[4 + idx]; 4439371c9d4SSatish Balay xw[5] = x[5 + idx]; 444e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 445e48d15efSToby Isaac v += 36; 446e48d15efSToby Isaac } 4479371c9d4SSatish Balay t[i2] = s[0]; 4489371c9d4SSatish Balay t[i2 + 1] = s[1]; 4499371c9d4SSatish Balay t[i2 + 2] = s[2]; 4509371c9d4SSatish Balay t[i2 + 3] = s[3]; 4519371c9d4SSatish Balay t[i2 + 4] = s[4]; 4529371c9d4SSatish Balay t[i2 + 5] = s[5]; 453e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4549371c9d4SSatish Balay x[i2] = xw[0]; 4559371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4569371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4579371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4589371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4599371c9d4SSatish Balay x[i2 + 5] = xw[5]; 460e48d15efSToby Isaac idiag += 36; 461e48d15efSToby Isaac i2 += 6; 462e48d15efSToby Isaac } 463e48d15efSToby Isaac break; 464e48d15efSToby Isaac case 7: 465e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4669371c9d4SSatish Balay t[0] = b[0]; 4679371c9d4SSatish Balay t[1] = b[1]; 4689371c9d4SSatish Balay t[2] = b[2]; 4699371c9d4SSatish Balay t[3] = b[3]; 4709371c9d4SSatish Balay t[4] = b[4]; 4719371c9d4SSatish Balay t[5] = b[5]; 4729371c9d4SSatish Balay t[6] = b[6]; 473e48d15efSToby Isaac i2 = 7; 474e48d15efSToby Isaac idiag += 49; 475e48d15efSToby Isaac for (i = 1; i < m; i++) { 476e48d15efSToby Isaac v = aa + 49 * ai[i]; 477e48d15efSToby Isaac vi = aj + ai[i]; 478e48d15efSToby Isaac nz = diag[i] - ai[i]; 4799371c9d4SSatish Balay s[0] = b[i2]; 4809371c9d4SSatish Balay s[1] = b[i2 + 1]; 4819371c9d4SSatish Balay s[2] = b[i2 + 2]; 4829371c9d4SSatish Balay s[3] = b[i2 + 3]; 4839371c9d4SSatish Balay s[4] = b[i2 + 4]; 4849371c9d4SSatish Balay s[5] = b[i2 + 5]; 4859371c9d4SSatish Balay s[6] = b[i2 + 6]; 486e48d15efSToby Isaac while (nz--) { 487e48d15efSToby Isaac idx = 7 * (*vi++); 4889371c9d4SSatish Balay xw[0] = x[idx]; 4899371c9d4SSatish Balay xw[1] = x[1 + idx]; 4909371c9d4SSatish Balay xw[2] = x[2 + idx]; 4919371c9d4SSatish Balay xw[3] = x[3 + idx]; 4929371c9d4SSatish Balay xw[4] = x[4 + idx]; 4939371c9d4SSatish Balay xw[5] = x[5 + idx]; 4949371c9d4SSatish Balay xw[6] = x[6 + idx]; 495e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 496e48d15efSToby Isaac v += 49; 497e48d15efSToby Isaac } 4989371c9d4SSatish Balay t[i2] = s[0]; 4999371c9d4SSatish Balay t[i2 + 1] = s[1]; 5009371c9d4SSatish Balay t[i2 + 2] = s[2]; 5019371c9d4SSatish Balay t[i2 + 3] = s[3]; 5029371c9d4SSatish Balay t[i2 + 4] = s[4]; 5039371c9d4SSatish Balay t[i2 + 5] = s[5]; 5049371c9d4SSatish Balay t[i2 + 6] = s[6]; 505e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 5069371c9d4SSatish Balay x[i2] = xw[0]; 5079371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5089371c9d4SSatish Balay x[i2 + 2] = xw[2]; 5099371c9d4SSatish Balay x[i2 + 3] = xw[3]; 5109371c9d4SSatish Balay x[i2 + 4] = xw[4]; 5119371c9d4SSatish Balay x[i2 + 5] = xw[5]; 5129371c9d4SSatish Balay x[i2 + 6] = xw[6]; 513e48d15efSToby Isaac idiag += 49; 514e48d15efSToby Isaac i2 += 7; 515e48d15efSToby Isaac } 516e48d15efSToby Isaac break; 517e48d15efSToby Isaac default: 51896b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5199566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 520de80f912SBarry Smith i2 = bs; 521de80f912SBarry Smith idiag += bs2; 522de80f912SBarry Smith for (i = 1; i < m; i++) { 523de80f912SBarry Smith v = aa + bs2 * ai[i]; 524de80f912SBarry Smith vi = aj + ai[i]; 525de80f912SBarry Smith nz = diag[i] - ai[i]; 526de80f912SBarry Smith 5279566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 528de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 529de80f912SBarry Smith workt = work; 530de80f912SBarry Smith for (j = 0; j < nz; j++) { 5319566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 532de80f912SBarry Smith workt += bs; 533de80f912SBarry Smith } 53496b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5359566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 53696b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 537de80f912SBarry Smith 538de80f912SBarry Smith idiag += bs2; 539de80f912SBarry Smith i2 += bs; 540de80f912SBarry Smith } 541e48d15efSToby Isaac break; 542e48d15efSToby Isaac } 543de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5449566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 545e48d15efSToby Isaac xb = t; 5469371c9d4SSatish Balay } else xb = b; 547de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 548e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 549e48d15efSToby Isaac i2 = bs * (m - 1); 550e48d15efSToby Isaac switch (bs) { 551e48d15efSToby Isaac case 1: 552e48d15efSToby Isaac s[0] = xb[i2]; 553e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 554e48d15efSToby Isaac x[i2] = xw[0]; 555e48d15efSToby Isaac i2 -= 1; 556e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 557e48d15efSToby Isaac v = aa + (diag[i] + 1); 558e48d15efSToby Isaac vi = aj + diag[i] + 1; 559e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 560e48d15efSToby Isaac s[0] = xb[i2]; 561e48d15efSToby Isaac for (j = 0; j < nz; j++) { 562e48d15efSToby Isaac xw[0] = x[vi[j]]; 563e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 564e48d15efSToby Isaac } 565e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 566e48d15efSToby Isaac x[i2] = xw[0]; 567e48d15efSToby Isaac idiag -= 1; 568e48d15efSToby Isaac i2 -= 1; 569e48d15efSToby Isaac } 570e48d15efSToby Isaac break; 571e48d15efSToby Isaac case 2: 5729371c9d4SSatish Balay s[0] = xb[i2]; 5739371c9d4SSatish Balay s[1] = xb[i2 + 1]; 574e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5759371c9d4SSatish Balay x[i2] = xw[0]; 5769371c9d4SSatish Balay x[i2 + 1] = xw[1]; 577e48d15efSToby Isaac i2 -= 2; 578e48d15efSToby Isaac idiag -= 4; 579e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 580e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 581e48d15efSToby Isaac vi = aj + diag[i] + 1; 582e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5839371c9d4SSatish Balay s[0] = xb[i2]; 5849371c9d4SSatish Balay s[1] = xb[i2 + 1]; 585e48d15efSToby Isaac for (j = 0; j < nz; j++) { 586e48d15efSToby Isaac idx = 2 * vi[j]; 587e48d15efSToby Isaac it = 4 * j; 5889371c9d4SSatish Balay xw[0] = x[idx]; 5899371c9d4SSatish Balay xw[1] = x[1 + idx]; 590e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 591e48d15efSToby Isaac } 592e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5939371c9d4SSatish Balay x[i2] = xw[0]; 5949371c9d4SSatish Balay x[i2 + 1] = xw[1]; 595e48d15efSToby Isaac idiag -= 4; 596e48d15efSToby Isaac i2 -= 2; 597e48d15efSToby Isaac } 598e48d15efSToby Isaac break; 599e48d15efSToby Isaac case 3: 6009371c9d4SSatish Balay s[0] = xb[i2]; 6019371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6029371c9d4SSatish Balay s[2] = xb[i2 + 2]; 603e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6049371c9d4SSatish Balay x[i2] = xw[0]; 6059371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6069371c9d4SSatish Balay x[i2 + 2] = xw[2]; 607e48d15efSToby Isaac i2 -= 3; 608e48d15efSToby Isaac idiag -= 9; 609e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 610e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 611e48d15efSToby Isaac vi = aj + diag[i] + 1; 612e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6139371c9d4SSatish Balay s[0] = xb[i2]; 6149371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6159371c9d4SSatish Balay s[2] = xb[i2 + 2]; 616e48d15efSToby Isaac while (nz--) { 617e48d15efSToby Isaac idx = 3 * (*vi++); 6189371c9d4SSatish Balay xw[0] = x[idx]; 6199371c9d4SSatish Balay xw[1] = x[1 + idx]; 6209371c9d4SSatish Balay xw[2] = x[2 + idx]; 621e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 622e48d15efSToby Isaac v += 9; 623e48d15efSToby Isaac } 624e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6259371c9d4SSatish Balay x[i2] = xw[0]; 6269371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6279371c9d4SSatish Balay x[i2 + 2] = xw[2]; 628e48d15efSToby Isaac idiag -= 9; 629e48d15efSToby Isaac i2 -= 3; 630e48d15efSToby Isaac } 631e48d15efSToby Isaac break; 632e48d15efSToby Isaac case 4: 6339371c9d4SSatish Balay s[0] = xb[i2]; 6349371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6359371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6369371c9d4SSatish Balay s[3] = xb[i2 + 3]; 637e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6389371c9d4SSatish Balay x[i2] = xw[0]; 6399371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6409371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6419371c9d4SSatish Balay x[i2 + 3] = xw[3]; 642e48d15efSToby Isaac i2 -= 4; 643e48d15efSToby Isaac idiag -= 16; 644e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 645e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 646e48d15efSToby Isaac vi = aj + diag[i] + 1; 647e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6489371c9d4SSatish Balay s[0] = xb[i2]; 6499371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6509371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6519371c9d4SSatish Balay s[3] = xb[i2 + 3]; 652e48d15efSToby Isaac while (nz--) { 653e48d15efSToby Isaac idx = 4 * (*vi++); 6549371c9d4SSatish Balay xw[0] = x[idx]; 6559371c9d4SSatish Balay xw[1] = x[1 + idx]; 6569371c9d4SSatish Balay xw[2] = x[2 + idx]; 6579371c9d4SSatish Balay xw[3] = x[3 + idx]; 658e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 659e48d15efSToby Isaac v += 16; 660e48d15efSToby Isaac } 661e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6629371c9d4SSatish Balay x[i2] = xw[0]; 6639371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6649371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6659371c9d4SSatish Balay x[i2 + 3] = xw[3]; 666e48d15efSToby Isaac idiag -= 16; 667e48d15efSToby Isaac i2 -= 4; 668e48d15efSToby Isaac } 669e48d15efSToby Isaac break; 670e48d15efSToby Isaac case 5: 6719371c9d4SSatish Balay s[0] = xb[i2]; 6729371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6739371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6749371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6759371c9d4SSatish Balay s[4] = xb[i2 + 4]; 676e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6779371c9d4SSatish Balay x[i2] = xw[0]; 6789371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6799371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6809371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6819371c9d4SSatish Balay x[i2 + 4] = xw[4]; 682e48d15efSToby Isaac i2 -= 5; 683e48d15efSToby Isaac idiag -= 25; 684e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 685e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 686e48d15efSToby Isaac vi = aj + diag[i] + 1; 687e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6889371c9d4SSatish Balay s[0] = xb[i2]; 6899371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6909371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6919371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6929371c9d4SSatish Balay s[4] = xb[i2 + 4]; 693e48d15efSToby Isaac while (nz--) { 694e48d15efSToby Isaac idx = 5 * (*vi++); 6959371c9d4SSatish Balay xw[0] = x[idx]; 6969371c9d4SSatish Balay xw[1] = x[1 + idx]; 6979371c9d4SSatish Balay xw[2] = x[2 + idx]; 6989371c9d4SSatish Balay xw[3] = x[3 + idx]; 6999371c9d4SSatish Balay xw[4] = x[4 + idx]; 700e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 701e48d15efSToby Isaac v += 25; 702e48d15efSToby Isaac } 703e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 7049371c9d4SSatish Balay x[i2] = xw[0]; 7059371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7069371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7079371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7089371c9d4SSatish Balay x[i2 + 4] = xw[4]; 709e48d15efSToby Isaac idiag -= 25; 710e48d15efSToby Isaac i2 -= 5; 711e48d15efSToby Isaac } 712e48d15efSToby Isaac break; 713e48d15efSToby Isaac case 6: 7149371c9d4SSatish Balay s[0] = xb[i2]; 7159371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7169371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7179371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7189371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7199371c9d4SSatish Balay s[5] = xb[i2 + 5]; 720e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7219371c9d4SSatish Balay x[i2] = xw[0]; 7229371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7239371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7249371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7259371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7269371c9d4SSatish Balay x[i2 + 5] = xw[5]; 727e48d15efSToby Isaac i2 -= 6; 728e48d15efSToby Isaac idiag -= 36; 729e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 730e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 731e48d15efSToby Isaac vi = aj + diag[i] + 1; 732e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7339371c9d4SSatish Balay s[0] = xb[i2]; 7349371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7359371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7369371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7379371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7389371c9d4SSatish Balay s[5] = xb[i2 + 5]; 739e48d15efSToby Isaac while (nz--) { 740e48d15efSToby Isaac idx = 6 * (*vi++); 7419371c9d4SSatish Balay xw[0] = x[idx]; 7429371c9d4SSatish Balay xw[1] = x[1 + idx]; 7439371c9d4SSatish Balay xw[2] = x[2 + idx]; 7449371c9d4SSatish Balay xw[3] = x[3 + idx]; 7459371c9d4SSatish Balay xw[4] = x[4 + idx]; 7469371c9d4SSatish Balay xw[5] = x[5 + idx]; 747e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 748e48d15efSToby Isaac v += 36; 749e48d15efSToby Isaac } 750e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7519371c9d4SSatish Balay x[i2] = xw[0]; 7529371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7539371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7549371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7559371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7569371c9d4SSatish Balay x[i2 + 5] = xw[5]; 757e48d15efSToby Isaac idiag -= 36; 758e48d15efSToby Isaac i2 -= 6; 759e48d15efSToby Isaac } 760e48d15efSToby Isaac break; 761e48d15efSToby Isaac case 7: 7629371c9d4SSatish Balay s[0] = xb[i2]; 7639371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7649371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7659371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7669371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7679371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7689371c9d4SSatish Balay s[6] = xb[i2 + 6]; 769e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7709371c9d4SSatish Balay x[i2] = xw[0]; 7719371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7729371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7739371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7749371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7759371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7769371c9d4SSatish Balay x[i2 + 6] = xw[6]; 777e48d15efSToby Isaac i2 -= 7; 778e48d15efSToby Isaac idiag -= 49; 779e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 780e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 781e48d15efSToby Isaac vi = aj + diag[i] + 1; 782e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7839371c9d4SSatish Balay s[0] = xb[i2]; 7849371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7859371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7869371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7879371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7889371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7899371c9d4SSatish Balay s[6] = xb[i2 + 6]; 790e48d15efSToby Isaac while (nz--) { 791e48d15efSToby Isaac idx = 7 * (*vi++); 7929371c9d4SSatish Balay xw[0] = x[idx]; 7939371c9d4SSatish Balay xw[1] = x[1 + idx]; 7949371c9d4SSatish Balay xw[2] = x[2 + idx]; 7959371c9d4SSatish Balay xw[3] = x[3 + idx]; 7969371c9d4SSatish Balay xw[4] = x[4 + idx]; 7979371c9d4SSatish Balay xw[5] = x[5 + idx]; 7989371c9d4SSatish Balay xw[6] = x[6 + idx]; 799e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 800e48d15efSToby Isaac v += 49; 801e48d15efSToby Isaac } 802e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 8039371c9d4SSatish Balay x[i2] = xw[0]; 8049371c9d4SSatish Balay x[i2 + 1] = xw[1]; 8059371c9d4SSatish Balay x[i2 + 2] = xw[2]; 8069371c9d4SSatish Balay x[i2 + 3] = xw[3]; 8079371c9d4SSatish Balay x[i2 + 4] = xw[4]; 8089371c9d4SSatish Balay x[i2 + 5] = xw[5]; 8099371c9d4SSatish Balay x[i2 + 6] = xw[6]; 810e48d15efSToby Isaac idiag -= 49; 811e48d15efSToby Isaac i2 -= 7; 812e48d15efSToby Isaac } 813e48d15efSToby Isaac break; 814e48d15efSToby Isaac default: 8159566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 81696b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 817de80f912SBarry Smith i2 -= bs; 818e48d15efSToby Isaac idiag -= bs2; 819de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 820de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 821de80f912SBarry Smith vi = aj + diag[i] + 1; 822de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 823de80f912SBarry Smith 8249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 825de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 826de80f912SBarry Smith workt = work; 827de80f912SBarry Smith for (j = 0; j < nz; j++) { 8289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 829de80f912SBarry Smith workt += bs; 830de80f912SBarry Smith } 83196b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 83296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 833e48d15efSToby Isaac 834de80f912SBarry Smith idiag -= bs2; 835de80f912SBarry Smith i2 -= bs; 836de80f912SBarry Smith } 837e48d15efSToby Isaac break; 838e48d15efSToby Isaac } 8399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 840de80f912SBarry Smith } 841e48d15efSToby Isaac its--; 842e48d15efSToby Isaac } 843e48d15efSToby Isaac while (its--) { 844e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 845e48d15efSToby Isaac idiag = a->idiag; 846e48d15efSToby Isaac i2 = 0; 847e48d15efSToby Isaac switch (bs) { 848e48d15efSToby Isaac case 1: 849e48d15efSToby Isaac for (i = 0; i < m; i++) { 850e48d15efSToby Isaac v = aa + ai[i]; 851e48d15efSToby Isaac vi = aj + ai[i]; 852e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 853e48d15efSToby Isaac s[0] = b[i2]; 854e48d15efSToby Isaac for (j = 0; j < nz; j++) { 855e48d15efSToby Isaac xw[0] = x[vi[j]]; 856e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 857e48d15efSToby Isaac } 858e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 859e48d15efSToby Isaac x[i2] += xw[0]; 860e48d15efSToby Isaac idiag += 1; 861e48d15efSToby Isaac i2 += 1; 862e48d15efSToby Isaac } 863e48d15efSToby Isaac break; 864e48d15efSToby Isaac case 2: 865e48d15efSToby Isaac for (i = 0; i < m; i++) { 866e48d15efSToby Isaac v = aa + 4 * ai[i]; 867e48d15efSToby Isaac vi = aj + ai[i]; 868e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8699371c9d4SSatish Balay s[0] = b[i2]; 8709371c9d4SSatish Balay s[1] = b[i2 + 1]; 871e48d15efSToby Isaac for (j = 0; j < nz; j++) { 872e48d15efSToby Isaac idx = 2 * vi[j]; 873e48d15efSToby Isaac it = 4 * j; 8749371c9d4SSatish Balay xw[0] = x[idx]; 8759371c9d4SSatish Balay xw[1] = x[1 + idx]; 876e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 877e48d15efSToby Isaac } 878e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8799371c9d4SSatish Balay x[i2] += xw[0]; 8809371c9d4SSatish Balay x[i2 + 1] += xw[1]; 881e48d15efSToby Isaac idiag += 4; 882e48d15efSToby Isaac i2 += 2; 883e48d15efSToby Isaac } 884e48d15efSToby Isaac break; 885e48d15efSToby Isaac case 3: 886e48d15efSToby Isaac for (i = 0; i < m; i++) { 887e48d15efSToby Isaac v = aa + 9 * ai[i]; 888e48d15efSToby Isaac vi = aj + ai[i]; 889e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8909371c9d4SSatish Balay s[0] = b[i2]; 8919371c9d4SSatish Balay s[1] = b[i2 + 1]; 8929371c9d4SSatish Balay s[2] = b[i2 + 2]; 893e48d15efSToby Isaac while (nz--) { 894e48d15efSToby Isaac idx = 3 * (*vi++); 8959371c9d4SSatish Balay xw[0] = x[idx]; 8969371c9d4SSatish Balay xw[1] = x[1 + idx]; 8979371c9d4SSatish Balay xw[2] = x[2 + idx]; 898e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 899e48d15efSToby Isaac v += 9; 900e48d15efSToby Isaac } 901e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 9029371c9d4SSatish Balay x[i2] += xw[0]; 9039371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9049371c9d4SSatish Balay x[i2 + 2] += xw[2]; 905e48d15efSToby Isaac idiag += 9; 906e48d15efSToby Isaac i2 += 3; 907e48d15efSToby Isaac } 908e48d15efSToby Isaac break; 909e48d15efSToby Isaac case 4: 910e48d15efSToby Isaac for (i = 0; i < m; i++) { 911e48d15efSToby Isaac v = aa + 16 * ai[i]; 912e48d15efSToby Isaac vi = aj + ai[i]; 913e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9149371c9d4SSatish Balay s[0] = b[i2]; 9159371c9d4SSatish Balay s[1] = b[i2 + 1]; 9169371c9d4SSatish Balay s[2] = b[i2 + 2]; 9179371c9d4SSatish Balay s[3] = b[i2 + 3]; 918e48d15efSToby Isaac while (nz--) { 919e48d15efSToby Isaac idx = 4 * (*vi++); 9209371c9d4SSatish Balay xw[0] = x[idx]; 9219371c9d4SSatish Balay xw[1] = x[1 + idx]; 9229371c9d4SSatish Balay xw[2] = x[2 + idx]; 9239371c9d4SSatish Balay xw[3] = x[3 + idx]; 924e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 925e48d15efSToby Isaac v += 16; 926e48d15efSToby Isaac } 927e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9289371c9d4SSatish Balay x[i2] += xw[0]; 9299371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9309371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9319371c9d4SSatish Balay x[i2 + 3] += xw[3]; 932e48d15efSToby Isaac idiag += 16; 933e48d15efSToby Isaac i2 += 4; 934e48d15efSToby Isaac } 935e48d15efSToby Isaac break; 936e48d15efSToby Isaac case 5: 937e48d15efSToby Isaac for (i = 0; i < m; i++) { 938e48d15efSToby Isaac v = aa + 25 * ai[i]; 939e48d15efSToby Isaac vi = aj + ai[i]; 940e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9419371c9d4SSatish Balay s[0] = b[i2]; 9429371c9d4SSatish Balay s[1] = b[i2 + 1]; 9439371c9d4SSatish Balay s[2] = b[i2 + 2]; 9449371c9d4SSatish Balay s[3] = b[i2 + 3]; 9459371c9d4SSatish Balay s[4] = b[i2 + 4]; 946e48d15efSToby Isaac while (nz--) { 947e48d15efSToby Isaac idx = 5 * (*vi++); 9489371c9d4SSatish Balay xw[0] = x[idx]; 9499371c9d4SSatish Balay xw[1] = x[1 + idx]; 9509371c9d4SSatish Balay xw[2] = x[2 + idx]; 9519371c9d4SSatish Balay xw[3] = x[3 + idx]; 9529371c9d4SSatish Balay xw[4] = x[4 + idx]; 953e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 954e48d15efSToby Isaac v += 25; 955e48d15efSToby Isaac } 956e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9579371c9d4SSatish Balay x[i2] += xw[0]; 9589371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9599371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9609371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9619371c9d4SSatish Balay x[i2 + 4] += xw[4]; 962e48d15efSToby Isaac idiag += 25; 963e48d15efSToby Isaac i2 += 5; 964e48d15efSToby Isaac } 965e48d15efSToby Isaac break; 966e48d15efSToby Isaac case 6: 967e48d15efSToby Isaac for (i = 0; i < m; i++) { 968e48d15efSToby Isaac v = aa + 36 * ai[i]; 969e48d15efSToby Isaac vi = aj + ai[i]; 970e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9719371c9d4SSatish Balay s[0] = b[i2]; 9729371c9d4SSatish Balay s[1] = b[i2 + 1]; 9739371c9d4SSatish Balay s[2] = b[i2 + 2]; 9749371c9d4SSatish Balay s[3] = b[i2 + 3]; 9759371c9d4SSatish Balay s[4] = b[i2 + 4]; 9769371c9d4SSatish Balay s[5] = b[i2 + 5]; 977e48d15efSToby Isaac while (nz--) { 978e48d15efSToby Isaac idx = 6 * (*vi++); 9799371c9d4SSatish Balay xw[0] = x[idx]; 9809371c9d4SSatish Balay xw[1] = x[1 + idx]; 9819371c9d4SSatish Balay xw[2] = x[2 + idx]; 9829371c9d4SSatish Balay xw[3] = x[3 + idx]; 9839371c9d4SSatish Balay xw[4] = x[4 + idx]; 9849371c9d4SSatish Balay xw[5] = x[5 + idx]; 985e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 986e48d15efSToby Isaac v += 36; 987e48d15efSToby Isaac } 988e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9899371c9d4SSatish Balay x[i2] += xw[0]; 9909371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9919371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9929371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9939371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9949371c9d4SSatish Balay x[i2 + 5] += xw[5]; 995e48d15efSToby Isaac idiag += 36; 996e48d15efSToby Isaac i2 += 6; 997e48d15efSToby Isaac } 998e48d15efSToby Isaac break; 999e48d15efSToby Isaac case 7: 1000e48d15efSToby Isaac for (i = 0; i < m; i++) { 1001e48d15efSToby Isaac v = aa + 49 * ai[i]; 1002e48d15efSToby Isaac vi = aj + ai[i]; 1003e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10049371c9d4SSatish Balay s[0] = b[i2]; 10059371c9d4SSatish Balay s[1] = b[i2 + 1]; 10069371c9d4SSatish Balay s[2] = b[i2 + 2]; 10079371c9d4SSatish Balay s[3] = b[i2 + 3]; 10089371c9d4SSatish Balay s[4] = b[i2 + 4]; 10099371c9d4SSatish Balay s[5] = b[i2 + 5]; 10109371c9d4SSatish Balay s[6] = b[i2 + 6]; 1011e48d15efSToby Isaac while (nz--) { 1012e48d15efSToby Isaac idx = 7 * (*vi++); 10139371c9d4SSatish Balay xw[0] = x[idx]; 10149371c9d4SSatish Balay xw[1] = x[1 + idx]; 10159371c9d4SSatish Balay xw[2] = x[2 + idx]; 10169371c9d4SSatish Balay xw[3] = x[3 + idx]; 10179371c9d4SSatish Balay xw[4] = x[4 + idx]; 10189371c9d4SSatish Balay xw[5] = x[5 + idx]; 10199371c9d4SSatish Balay xw[6] = x[6 + idx]; 1020e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1021e48d15efSToby Isaac v += 49; 1022e48d15efSToby Isaac } 1023e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10249371c9d4SSatish Balay x[i2] += xw[0]; 10259371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10269371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10279371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10289371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10299371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10309371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1031e48d15efSToby Isaac idiag += 49; 1032e48d15efSToby Isaac i2 += 7; 1033e48d15efSToby Isaac } 1034e48d15efSToby Isaac break; 1035e48d15efSToby Isaac default: 1036e48d15efSToby Isaac for (i = 0; i < m; i++) { 1037e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1038e48d15efSToby Isaac vi = aj + ai[i]; 1039e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1040e48d15efSToby Isaac 10419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1042e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1043e48d15efSToby Isaac workt = work; 1044e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10459566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1046e48d15efSToby Isaac workt += bs; 1047e48d15efSToby Isaac } 1048e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1049e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1050e48d15efSToby Isaac 1051e48d15efSToby Isaac idiag += bs2; 1052e48d15efSToby Isaac i2 += bs; 1053e48d15efSToby Isaac } 1054e48d15efSToby Isaac break; 1055e48d15efSToby Isaac } 10569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1057e48d15efSToby Isaac } 1058e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1059e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1060e48d15efSToby Isaac i2 = bs * (m - 1); 1061e48d15efSToby Isaac switch (bs) { 1062e48d15efSToby Isaac case 1: 1063e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1064e48d15efSToby Isaac v = aa + ai[i]; 1065e48d15efSToby Isaac vi = aj + ai[i]; 1066e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1067e48d15efSToby Isaac s[0] = b[i2]; 1068e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1069e48d15efSToby Isaac xw[0] = x[vi[j]]; 1070e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1071e48d15efSToby Isaac } 1072e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1073e48d15efSToby Isaac x[i2] += xw[0]; 1074e48d15efSToby Isaac idiag -= 1; 1075e48d15efSToby Isaac i2 -= 1; 1076e48d15efSToby Isaac } 1077e48d15efSToby Isaac break; 1078e48d15efSToby Isaac case 2: 1079e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1080e48d15efSToby Isaac v = aa + 4 * ai[i]; 1081e48d15efSToby Isaac vi = aj + ai[i]; 1082e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10839371c9d4SSatish Balay s[0] = b[i2]; 10849371c9d4SSatish Balay s[1] = b[i2 + 1]; 1085e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1086e48d15efSToby Isaac idx = 2 * vi[j]; 1087e48d15efSToby Isaac it = 4 * j; 10889371c9d4SSatish Balay xw[0] = x[idx]; 10899371c9d4SSatish Balay xw[1] = x[1 + idx]; 1090e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1091e48d15efSToby Isaac } 1092e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10939371c9d4SSatish Balay x[i2] += xw[0]; 10949371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1095e48d15efSToby Isaac idiag -= 4; 1096e48d15efSToby Isaac i2 -= 2; 1097e48d15efSToby Isaac } 1098e48d15efSToby Isaac break; 1099e48d15efSToby Isaac case 3: 1100e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1101e48d15efSToby Isaac v = aa + 9 * ai[i]; 1102e48d15efSToby Isaac vi = aj + ai[i]; 1103e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11049371c9d4SSatish Balay s[0] = b[i2]; 11059371c9d4SSatish Balay s[1] = b[i2 + 1]; 11069371c9d4SSatish Balay s[2] = b[i2 + 2]; 1107e48d15efSToby Isaac while (nz--) { 1108e48d15efSToby Isaac idx = 3 * (*vi++); 11099371c9d4SSatish Balay xw[0] = x[idx]; 11109371c9d4SSatish Balay xw[1] = x[1 + idx]; 11119371c9d4SSatish Balay xw[2] = x[2 + idx]; 1112e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1113e48d15efSToby Isaac v += 9; 1114e48d15efSToby Isaac } 1115e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11169371c9d4SSatish Balay x[i2] += xw[0]; 11179371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11189371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1119e48d15efSToby Isaac idiag -= 9; 1120e48d15efSToby Isaac i2 -= 3; 1121e48d15efSToby Isaac } 1122e48d15efSToby Isaac break; 1123e48d15efSToby Isaac case 4: 1124e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1125e48d15efSToby Isaac v = aa + 16 * ai[i]; 1126e48d15efSToby Isaac vi = aj + ai[i]; 1127e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11289371c9d4SSatish Balay s[0] = b[i2]; 11299371c9d4SSatish Balay s[1] = b[i2 + 1]; 11309371c9d4SSatish Balay s[2] = b[i2 + 2]; 11319371c9d4SSatish Balay s[3] = b[i2 + 3]; 1132e48d15efSToby Isaac while (nz--) { 1133e48d15efSToby Isaac idx = 4 * (*vi++); 11349371c9d4SSatish Balay xw[0] = x[idx]; 11359371c9d4SSatish Balay xw[1] = x[1 + idx]; 11369371c9d4SSatish Balay xw[2] = x[2 + idx]; 11379371c9d4SSatish Balay xw[3] = x[3 + idx]; 1138e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1139e48d15efSToby Isaac v += 16; 1140e48d15efSToby Isaac } 1141e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11429371c9d4SSatish Balay x[i2] += xw[0]; 11439371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11449371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11459371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1146e48d15efSToby Isaac idiag -= 16; 1147e48d15efSToby Isaac i2 -= 4; 1148e48d15efSToby Isaac } 1149e48d15efSToby Isaac break; 1150e48d15efSToby Isaac case 5: 1151e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1152e48d15efSToby Isaac v = aa + 25 * ai[i]; 1153e48d15efSToby Isaac vi = aj + ai[i]; 1154e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11559371c9d4SSatish Balay s[0] = b[i2]; 11569371c9d4SSatish Balay s[1] = b[i2 + 1]; 11579371c9d4SSatish Balay s[2] = b[i2 + 2]; 11589371c9d4SSatish Balay s[3] = b[i2 + 3]; 11599371c9d4SSatish Balay s[4] = b[i2 + 4]; 1160e48d15efSToby Isaac while (nz--) { 1161e48d15efSToby Isaac idx = 5 * (*vi++); 11629371c9d4SSatish Balay xw[0] = x[idx]; 11639371c9d4SSatish Balay xw[1] = x[1 + idx]; 11649371c9d4SSatish Balay xw[2] = x[2 + idx]; 11659371c9d4SSatish Balay xw[3] = x[3 + idx]; 11669371c9d4SSatish Balay xw[4] = x[4 + idx]; 1167e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1168e48d15efSToby Isaac v += 25; 1169e48d15efSToby Isaac } 1170e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11719371c9d4SSatish Balay x[i2] += xw[0]; 11729371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11739371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11749371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11759371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1176e48d15efSToby Isaac idiag -= 25; 1177e48d15efSToby Isaac i2 -= 5; 1178e48d15efSToby Isaac } 1179e48d15efSToby Isaac break; 1180e48d15efSToby Isaac case 6: 1181e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1182e48d15efSToby Isaac v = aa + 36 * ai[i]; 1183e48d15efSToby Isaac vi = aj + ai[i]; 1184e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11859371c9d4SSatish Balay s[0] = b[i2]; 11869371c9d4SSatish Balay s[1] = b[i2 + 1]; 11879371c9d4SSatish Balay s[2] = b[i2 + 2]; 11889371c9d4SSatish Balay s[3] = b[i2 + 3]; 11899371c9d4SSatish Balay s[4] = b[i2 + 4]; 11909371c9d4SSatish Balay s[5] = b[i2 + 5]; 1191e48d15efSToby Isaac while (nz--) { 1192e48d15efSToby Isaac idx = 6 * (*vi++); 11939371c9d4SSatish Balay xw[0] = x[idx]; 11949371c9d4SSatish Balay xw[1] = x[1 + idx]; 11959371c9d4SSatish Balay xw[2] = x[2 + idx]; 11969371c9d4SSatish Balay xw[3] = x[3 + idx]; 11979371c9d4SSatish Balay xw[4] = x[4 + idx]; 11989371c9d4SSatish Balay xw[5] = x[5 + idx]; 1199e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1200e48d15efSToby Isaac v += 36; 1201e48d15efSToby Isaac } 1202e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 12039371c9d4SSatish Balay x[i2] += xw[0]; 12049371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12059371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12069371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12079371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12089371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1209e48d15efSToby Isaac idiag -= 36; 1210e48d15efSToby Isaac i2 -= 6; 1211e48d15efSToby Isaac } 1212e48d15efSToby Isaac break; 1213e48d15efSToby Isaac case 7: 1214e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1215e48d15efSToby Isaac v = aa + 49 * ai[i]; 1216e48d15efSToby Isaac vi = aj + ai[i]; 1217e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12189371c9d4SSatish Balay s[0] = b[i2]; 12199371c9d4SSatish Balay s[1] = b[i2 + 1]; 12209371c9d4SSatish Balay s[2] = b[i2 + 2]; 12219371c9d4SSatish Balay s[3] = b[i2 + 3]; 12229371c9d4SSatish Balay s[4] = b[i2 + 4]; 12239371c9d4SSatish Balay s[5] = b[i2 + 5]; 12249371c9d4SSatish Balay s[6] = b[i2 + 6]; 1225e48d15efSToby Isaac while (nz--) { 1226e48d15efSToby Isaac idx = 7 * (*vi++); 12279371c9d4SSatish Balay xw[0] = x[idx]; 12289371c9d4SSatish Balay xw[1] = x[1 + idx]; 12299371c9d4SSatish Balay xw[2] = x[2 + idx]; 12309371c9d4SSatish Balay xw[3] = x[3 + idx]; 12319371c9d4SSatish Balay xw[4] = x[4 + idx]; 12329371c9d4SSatish Balay xw[5] = x[5 + idx]; 12339371c9d4SSatish Balay xw[6] = x[6 + idx]; 1234e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1235e48d15efSToby Isaac v += 49; 1236e48d15efSToby Isaac } 1237e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12389371c9d4SSatish Balay x[i2] += xw[0]; 12399371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12409371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12419371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12429371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12439371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12449371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1245e48d15efSToby Isaac idiag -= 49; 1246e48d15efSToby Isaac i2 -= 7; 1247e48d15efSToby Isaac } 1248e48d15efSToby Isaac break; 1249e48d15efSToby Isaac default: 1250e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1251e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1252e48d15efSToby Isaac vi = aj + ai[i]; 1253e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1254e48d15efSToby Isaac 12559566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1256e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1257e48d15efSToby Isaac workt = work; 1258e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1260e48d15efSToby Isaac workt += bs; 1261e48d15efSToby Isaac } 1262e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1263e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1264e48d15efSToby Isaac 1265e48d15efSToby Isaac idiag -= bs2; 1266e48d15efSToby Isaac i2 -= bs; 1267e48d15efSToby Isaac } 1268e48d15efSToby Isaac break; 1269e48d15efSToby Isaac } 12709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1271e48d15efSToby Isaac } 1272e48d15efSToby Isaac } 12739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1276de80f912SBarry Smith } 1277de80f912SBarry Smith 1278af674e45SBarry Smith /* 127981824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1280af674e45SBarry Smith */ 1281af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1282af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1283af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1284af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1285af674e45SBarry Smith #endif 1286af674e45SBarry Smith 1287d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1288d71ae5a4SJacob Faibussowitsch { 1289af674e45SBarry Smith Mat A = *AA; 1290af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1291c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1292c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 129317ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1294f15d580aSBarry Smith const PetscScalar *value = v; 12954bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1296af674e45SBarry Smith 1297af674e45SBarry Smith PetscFunctionBegin; 1298ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1299af674e45SBarry Smith stepval = (n - 1) * 4; 1300af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1301af674e45SBarry Smith row = im[k]; 1302af674e45SBarry Smith rp = aj + ai[row]; 1303af674e45SBarry Smith ap = aa + 16 * ai[row]; 1304af674e45SBarry Smith nrow = ailen[row]; 1305af674e45SBarry Smith low = 0; 130617ec6a02SBarry Smith high = nrow; 1307af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1308af674e45SBarry Smith col = in[l]; 1309db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1310db4deed7SKarl Rupp else high = nrow; 131117ec6a02SBarry Smith lastcol = col; 13121e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1313af674e45SBarry Smith while (high - low > 7) { 1314af674e45SBarry Smith t = (low + high) / 2; 1315af674e45SBarry Smith if (rp[t] > col) high = t; 1316af674e45SBarry Smith else low = t; 1317af674e45SBarry Smith } 1318af674e45SBarry Smith for (i = low; i < high; i++) { 1319af674e45SBarry Smith if (rp[i] > col) break; 1320af674e45SBarry Smith if (rp[i] == col) { 1321af674e45SBarry Smith bap = ap + 16 * i; 1322af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1323ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1324af674e45SBarry Smith } 1325af674e45SBarry Smith goto noinsert2; 1326af674e45SBarry Smith } 1327af674e45SBarry Smith } 1328af674e45SBarry Smith N = nrow++ - 1; 132917ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1330af674e45SBarry Smith /* shift up all the later entries in this row */ 1331af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1332af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13339566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1334af674e45SBarry Smith } 133548a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1336af674e45SBarry Smith rp[i] = col; 1337af674e45SBarry Smith bap = ap + 16 * i; 1338af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1339ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1340af674e45SBarry Smith } 1341af674e45SBarry Smith noinsert2:; 1342af674e45SBarry Smith low = i; 1343af674e45SBarry Smith } 1344af674e45SBarry Smith ailen[row] = nrow; 1345af674e45SBarry Smith } 1346be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1347af674e45SBarry Smith } 1348af674e45SBarry Smith 1349af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1350af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1351af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1352af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1353af674e45SBarry Smith #endif 1354af674e45SBarry Smith 1355d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1356d71ae5a4SJacob Faibussowitsch { 1357af674e45SBarry Smith Mat A = *AA; 1358af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1359580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1360c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1361c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 136217ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1363af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1364af674e45SBarry Smith 1365af674e45SBarry Smith PetscFunctionBegin; 1366af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13679371c9d4SSatish Balay row = im[k]; 13689371c9d4SSatish Balay brow = row / 4; 1369af674e45SBarry Smith rp = aj + ai[brow]; 1370af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1371af674e45SBarry Smith nrow = ailen[brow]; 1372af674e45SBarry Smith low = 0; 137317ec6a02SBarry Smith high = nrow; 1374af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13759371c9d4SSatish Balay col = in[l]; 13769371c9d4SSatish Balay bcol = col / 4; 13779371c9d4SSatish Balay ridx = row % 4; 13789371c9d4SSatish Balay cidx = col % 4; 1379af674e45SBarry Smith value = v[l + k * n]; 1380db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1381db4deed7SKarl Rupp else high = nrow; 138217ec6a02SBarry Smith lastcol = col; 1383af674e45SBarry Smith while (high - low > 7) { 1384af674e45SBarry Smith t = (low + high) / 2; 1385af674e45SBarry Smith if (rp[t] > bcol) high = t; 1386af674e45SBarry Smith else low = t; 1387af674e45SBarry Smith } 1388af674e45SBarry Smith for (i = low; i < high; i++) { 1389af674e45SBarry Smith if (rp[i] > bcol) break; 1390af674e45SBarry Smith if (rp[i] == bcol) { 1391af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1392af674e45SBarry Smith *bap += value; 1393af674e45SBarry Smith goto noinsert1; 1394af674e45SBarry Smith } 1395af674e45SBarry Smith } 1396af674e45SBarry Smith N = nrow++ - 1; 139717ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1398af674e45SBarry Smith /* shift up all the later entries in this row */ 13999566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 14009566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 14019566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1402af674e45SBarry Smith rp[i] = bcol; 1403af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1404af674e45SBarry Smith noinsert1:; 1405af674e45SBarry Smith low = i; 1406af674e45SBarry Smith } 1407af674e45SBarry Smith ailen[brow] = nrow; 1408af674e45SBarry Smith } 1409be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1410af674e45SBarry Smith } 1411af674e45SBarry Smith 1412be5855fcSBarry Smith /* 1413be5855fcSBarry Smith Checks for missing diagonals 1414be5855fcSBarry Smith */ 1415d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1416d71ae5a4SJacob Faibussowitsch { 1417be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14187734d3b5SMatthew G. Knepley PetscInt *diag, *ii = a->i, i; 1419be5855fcSBarry Smith 1420be5855fcSBarry Smith PetscFunctionBegin; 14219566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 14222af78befSBarry Smith *missing = PETSC_FALSE; 14237734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 14242efa7f71SHong Zhang *missing = PETSC_TRUE; 14252efa7f71SHong Zhang if (d) *d = 0; 14269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 14272efa7f71SHong Zhang } else { 142801445905SHong Zhang PetscInt n; 142901445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1430883fce79SBarry Smith diag = a->diag; 143101445905SHong Zhang for (i = 0; i < n; i++) { 14327734d3b5SMatthew G. Knepley if (diag[i] >= ii[i + 1]) { 14332af78befSBarry Smith *missing = PETSC_TRUE; 14342af78befSBarry Smith if (d) *d = i; 14359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1436358d2f5dSShri Abhyankar break; 14372efa7f71SHong Zhang } 1438be5855fcSBarry Smith } 1439be5855fcSBarry Smith } 14403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1441be5855fcSBarry Smith } 1442be5855fcSBarry Smith 1443d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1444d71ae5a4SJacob Faibussowitsch { 1445de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 144609f38230SBarry Smith PetscInt i, j, m = a->mbs; 1447de6a44a3SBarry Smith 14483a40ed3dSBarry Smith PetscFunctionBegin; 144909f38230SBarry Smith if (!a->diag) { 14509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->diag)); 14514fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 145209f38230SBarry Smith } 14537fc0212eSBarry Smith for (i = 0; i < m; i++) { 145409f38230SBarry Smith a->diag[i] = a->i[i + 1]; 1455de6a44a3SBarry Smith for (j = a->i[i]; j < a->i[i + 1]; j++) { 1456de6a44a3SBarry Smith if (a->j[j] == i) { 145709f38230SBarry Smith a->diag[i] = j; 1458de6a44a3SBarry Smith break; 1459de6a44a3SBarry Smith } 1460de6a44a3SBarry Smith } 1461de6a44a3SBarry Smith } 14623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1463de6a44a3SBarry Smith } 14642593348eSBarry Smith 1465d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1466d71ae5a4SJacob Faibussowitsch { 14673b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14681a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14691a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14703b2fbd54SBarry Smith 14713a40ed3dSBarry Smith PetscFunctionBegin; 14723b2fbd54SBarry Smith *nn = n; 14733ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 14743b2fbd54SBarry Smith if (symmetric) { 14759566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1476553b3c51SBarry Smith nz = tia[n]; 14773b2fbd54SBarry Smith } else { 14789371c9d4SSatish Balay tia = a->i; 14799371c9d4SSatish Balay tja = a->j; 14803b2fbd54SBarry Smith } 14813b2fbd54SBarry Smith 1482ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1483ecc77c7aSBarry Smith (*nn) *= bs; 14848f7157efSSatish Balay /* malloc & create the natural set of indices */ 14859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14869985e31cSBarry Smith if (n) { 14872462f5fdSStefano Zampini (*ia)[0] = oshift; 1488ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 14899985e31cSBarry Smith } 1490ecc77c7aSBarry Smith 1491ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1492ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1493ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 14948f7157efSSatish Balay } 1495ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1496ecc77c7aSBarry Smith 14971a83f524SJed Brown if (inja) { 14989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 14999985e31cSBarry Smith cnt = 0; 15009985e31cSBarry Smith for (i = 0; i < n; i++) { 15019985e31cSBarry Smith for (j = 0; j < bs; j++) { 15029985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 1503ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 15049985e31cSBarry Smith } 15059985e31cSBarry Smith } 15069985e31cSBarry Smith } 15079985e31cSBarry Smith } 15089985e31cSBarry Smith 15098f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 15109566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 15119566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 15128f7157efSSatish Balay } 1513f6d58c54SBarry Smith } else if (oshift == 1) { 1514715a17b5SBarry Smith if (symmetric) { 1515a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1516715a17b5SBarry Smith /* add 1 to i and j indices */ 1517715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1518715a17b5SBarry Smith *ia = tia; 1519715a17b5SBarry Smith if (ja) { 1520715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1521715a17b5SBarry Smith *ja = tja; 1522715a17b5SBarry Smith } 1523715a17b5SBarry Smith } else { 1524a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1525f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 15269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1527f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1528f6d58c54SBarry Smith if (ja) { 15299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1530f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1531f6d58c54SBarry Smith } 1532715a17b5SBarry Smith } 15338f7157efSSatish Balay } else { 15348f7157efSSatish Balay *ia = tia; 1535ecc77c7aSBarry Smith if (ja) *ja = tja; 15368f7157efSSatish Balay } 15373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15383b2fbd54SBarry Smith } 15393b2fbd54SBarry Smith 1540d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1541d71ae5a4SJacob Faibussowitsch { 15423a40ed3dSBarry Smith PetscFunctionBegin; 15433ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1544715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 15459566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 15469566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 15473b2fbd54SBarry Smith } 15483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15493b2fbd54SBarry Smith } 15503b2fbd54SBarry Smith 1551d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1552d71ae5a4SJacob Faibussowitsch { 15532d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15542d61bbb3SSatish Balay 1555433994e6SBarry Smith PetscFunctionBegin; 1556b4e2f619SBarry Smith if (A->hash_active) { 1557b4e2f619SBarry Smith PetscInt bs; 1558e3c72094SPierre Jolivet A->ops[0] = a->cops; 1559b4e2f619SBarry Smith PetscCall(PetscHMapIJVDestroy(&a->ht)); 1560b4e2f619SBarry Smith PetscCall(MatGetBlockSize(A, &bs)); 1561b4e2f619SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1562b4e2f619SBarry Smith PetscCall(PetscFree(a->dnz)); 1563b4e2f619SBarry Smith PetscCall(PetscFree(a->bdnz)); 1564b4e2f619SBarry Smith A->hash_active = PETSC_FALSE; 1565b4e2f619SBarry Smith } 15663ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 15679566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 15709566063dSJacob Faibussowitsch if (a->free_diag) PetscCall(PetscFree(a->diag)); 15719566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15729566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15739566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15749566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15759566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15769566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15779566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15789566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15799566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1580c4319e64SHong Zhang 15819566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15839566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1584901853e0SKris Buschelman 15859566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 15977ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 15989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 15997ea3e4caSstefano_zampini #endif 16009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 16012e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 16023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16032d61bbb3SSatish Balay } 16042d61bbb3SSatish Balay 160566976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1606d71ae5a4SJacob Faibussowitsch { 16072d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 16082d61bbb3SSatish Balay 16092d61bbb3SSatish Balay PetscFunctionBegin; 1610aa275fccSKris Buschelman switch (op) { 1611d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 1612d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 1613d71ae5a4SJacob Faibussowitsch break; 1614d71ae5a4SJacob Faibussowitsch case MAT_KEEP_NONZERO_PATTERN: 1615d71ae5a4SJacob Faibussowitsch a->keepnonzeropattern = flg; 1616d71ae5a4SJacob Faibussowitsch break; 1617d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATIONS: 1618d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? 0 : 1); 1619d71ae5a4SJacob Faibussowitsch break; 1620d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATION_ERR: 1621d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -1 : 0); 1622d71ae5a4SJacob Faibussowitsch break; 1623d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_ALLOCATION_ERR: 1624d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -2 : 0); 1625d71ae5a4SJacob Faibussowitsch break; 1626d71ae5a4SJacob Faibussowitsch case MAT_UNUSED_NONZERO_LOCATION_ERR: 1627d71ae5a4SJacob Faibussowitsch a->nounused = (flg ? -1 : 0); 1628d71ae5a4SJacob Faibussowitsch break; 16298c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1630aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1631aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1632d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1633d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1634d71ae5a4SJacob Faibussowitsch break; 16355021d80fSJed Brown case MAT_SPD: 163677e54ba9SKris Buschelman case MAT_SYMMETRIC: 163777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 16389a4540c5SBarry Smith case MAT_HERMITIAN: 16399a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1640b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1641c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1642672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 1643b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1644b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 164577e54ba9SKris Buschelman break; 1646d71ae5a4SJacob Faibussowitsch default: 1647d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 16482d61bbb3SSatish Balay } 16493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16502d61bbb3SSatish Balay } 16512d61bbb3SSatish Balay 165252768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1653d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1654d71ae5a4SJacob Faibussowitsch { 165552768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 165652768537SHong Zhang MatScalar *aa_i; 165787828ca2SBarry Smith PetscScalar *v_i; 16582d61bbb3SSatish Balay 16592d61bbb3SSatish Balay PetscFunctionBegin; 1660d0f46423SBarry Smith bs = A->rmap->bs; 166152768537SHong Zhang bs2 = bs * bs; 16625f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 16632d61bbb3SSatish Balay 16642d61bbb3SSatish Balay bn = row / bs; /* Block number */ 16652d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 16662d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 16672d61bbb3SSatish Balay *nz = bs * M; 16682d61bbb3SSatish Balay 16692d61bbb3SSatish Balay if (v) { 1670f4259b30SLisandro Dalcin *v = NULL; 16712d61bbb3SSatish Balay if (*nz) { 16729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16732d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16742d61bbb3SSatish Balay v_i = *v + i * bs; 16752d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 167626fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16772d61bbb3SSatish Balay } 16782d61bbb3SSatish Balay } 16792d61bbb3SSatish Balay } 16802d61bbb3SSatish Balay 16812d61bbb3SSatish Balay if (idx) { 1682f4259b30SLisandro Dalcin *idx = NULL; 16832d61bbb3SSatish Balay if (*nz) { 16849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16852d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16862d61bbb3SSatish Balay idx_i = *idx + i * bs; 16872d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 168826fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16892d61bbb3SSatish Balay } 16902d61bbb3SSatish Balay } 16912d61bbb3SSatish Balay } 16923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16932d61bbb3SSatish Balay } 16942d61bbb3SSatish Balay 1695d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1696d71ae5a4SJacob Faibussowitsch { 169752768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 169852768537SHong Zhang 169952768537SHong Zhang PetscFunctionBegin; 17009566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 17013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 170252768537SHong Zhang } 170352768537SHong Zhang 1704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1705d71ae5a4SJacob Faibussowitsch { 17062d61bbb3SSatish Balay PetscFunctionBegin; 17079566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 17089566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 17093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17102d61bbb3SSatish Balay } 17112d61bbb3SSatish Balay 171266976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1713d71ae5a4SJacob Faibussowitsch { 171420e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 17152d61bbb3SSatish Balay Mat C; 171620e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 171720e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 171820e84f26SHong Zhang MatScalar *ata, *aa = a->a; 17192d61bbb3SSatish Balay 17202d61bbb3SSatish Balay PetscFunctionBegin; 17217fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 17229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1723cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 172420e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 17252d61bbb3SSatish Balay 17269566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 17279566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 17289566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 17299566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 173020e84f26SHong Zhang 173120e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 173220e84f26SHong Zhang ati = at->i; 173320e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1734fc4dec0aSBarry Smith } else { 1735fc4dec0aSBarry Smith C = *B; 173620e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 173720e84f26SHong Zhang ati = at->i; 1738fc4dec0aSBarry Smith } 1739fc4dec0aSBarry Smith 174020e84f26SHong Zhang atj = at->j; 174120e84f26SHong Zhang ata = at->a; 174220e84f26SHong Zhang 174320e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 17449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 174520e84f26SHong Zhang 174620e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 17472d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 174820e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 174920e84f26SHong Zhang for (j = 0; j < anzj; j++) { 175020e84f26SHong Zhang atj[atfill[*aj]] = i; 175120e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 1752ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 17532d61bbb3SSatish Balay } 175420e84f26SHong Zhang atfill[*aj++] += 1; 175520e84f26SHong Zhang } 175620e84f26SHong Zhang } 17579566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 17589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 17592d61bbb3SSatish Balay 176020e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 17619566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 176220e84f26SHong Zhang 1763cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 17649566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 17652d61bbb3SSatish Balay *B = C; 17662d61bbb3SSatish Balay } else { 17679566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 17682d61bbb3SSatish Balay } 17693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17702d61bbb3SSatish Balay } 17712d61bbb3SSatish Balay 1772ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1773d71ae5a4SJacob Faibussowitsch { 1774453d3561SHong Zhang Mat Btrans; 1775453d3561SHong Zhang 1776453d3561SHong Zhang PetscFunctionBegin; 1777453d3561SHong Zhang *f = PETSC_FALSE; 1778acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17799566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 17813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1782453d3561SHong Zhang } 1783453d3561SHong Zhang 1784618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1785d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1786d71ae5a4SJacob Faibussowitsch { 1787b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1788b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1789b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1790b51a4376SLisandro Dalcin PetscScalar *matvals; 17912593348eSBarry Smith 17923a40ed3dSBarry Smith PetscFunctionBegin; 17939566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17943b2fbd54SBarry Smith 1795b51a4376SLisandro Dalcin M = mat->rmap->N; 1796b51a4376SLisandro Dalcin N = mat->cmap->N; 1797b51a4376SLisandro Dalcin m = mat->rmap->n; 1798b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1799b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 18002593348eSBarry Smith 1801b51a4376SLisandro Dalcin /* write matrix header */ 1802b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 18039371c9d4SSatish Balay header[1] = M; 18049371c9d4SSatish Balay header[2] = N; 18059371c9d4SSatish Balay header[3] = nz; 18069566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 18072593348eSBarry Smith 1808b51a4376SLisandro Dalcin /* store row lengths */ 18099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1810b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 18119371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 18129566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 18139566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1814b51a4376SLisandro Dalcin 1815b51a4376SLisandro Dalcin /* store column indices */ 18169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1817b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1818b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1819b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18209371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 18215f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18229566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 18239566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 18242593348eSBarry Smith 18252593348eSBarry Smith /* store nonzero values */ 18269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1827b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1828b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1829b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18309371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 18315f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18329566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 18339566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1834ce6f0cecSBarry Smith 1835b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 18369566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 18373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18382593348eSBarry Smith } 18392593348eSBarry Smith 1840d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1841d71ae5a4SJacob Faibussowitsch { 18427dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 18437dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 18447dc0baabSHong Zhang 18457dc0baabSHong Zhang PetscFunctionBegin; 18469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 18477dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 18489566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 184948a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 18509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18517dc0baabSHong Zhang } 18529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18547dc0baabSHong Zhang } 18557dc0baabSHong Zhang 1856d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1857d71ae5a4SJacob Faibussowitsch { 1858b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1859d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1860f3ef73ceSBarry Smith PetscViewerFormat format; 18612593348eSBarry Smith 18623a40ed3dSBarry Smith PetscFunctionBegin; 18637dc0baabSHong Zhang if (A->structure_only) { 18649566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 18653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18667dc0baabSHong Zhang } 18677dc0baabSHong Zhang 18689566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1869456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1871fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1872ade3a672SBarry Smith const char *matname; 1873bcd9e38bSBarry Smith Mat aij; 18749566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18759566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18769566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18779566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 187904929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 18803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1881fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 188344cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 188444cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 188644cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 188744cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1888aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18890e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18909371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18910e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18929371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18930e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18949566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18950ef38995SBarry Smith } 189644cd7ae7SLois Curfman McInnes #else 189748a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 189844cd7ae7SLois Curfman McInnes #endif 189944cd7ae7SLois Curfman McInnes } 190044cd7ae7SLois Curfman McInnes } 19019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 190244cd7ae7SLois Curfman McInnes } 190344cd7ae7SLois Curfman McInnes } 19049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 19050ef38995SBarry Smith } else { 19069566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1907b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1908b6490206SBarry Smith for (j = 0; j < bs; j++) { 19099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1910b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1911b6490206SBarry Smith for (l = 0; l < bs; l++) { 1912aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 19130e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 19149371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 19150e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 19169371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 19170ef38995SBarry Smith } else { 19189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 191988685aaeSLois Curfman McInnes } 192088685aaeSLois Curfman McInnes #else 19219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 192288685aaeSLois Curfman McInnes #endif 19232593348eSBarry Smith } 19242593348eSBarry Smith } 19259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 19262593348eSBarry Smith } 19272593348eSBarry Smith } 19289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1929b6490206SBarry Smith } 19309566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 19313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19322593348eSBarry Smith } 19332593348eSBarry Smith 19349804daf3SBarry Smith #include <petscdraw.h> 1935d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1936d71ae5a4SJacob Faibussowitsch { 193777ed5343SBarry Smith Mat A = (Mat)Aa; 19383270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1939d0f46423SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 19400e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 19413f1db9ecSBarry Smith MatScalar *aa; 1942b0a32e0cSBarry Smith PetscViewer viewer; 1943b3e7f47fSJed Brown PetscViewerFormat format; 19443270192aSSatish Balay 19453a40ed3dSBarry Smith PetscFunctionBegin; 19469566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 19479566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 19489566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 194977ed5343SBarry Smith 19503270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1951b3e7f47fSJed Brown 1952b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1953d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1954383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1955b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 19563270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19573270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19589371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19599371c9d4SSatish Balay y_r = y_l + 1.0; 19609371c9d4SSatish Balay x_l = a->j[j] * bs; 19619371c9d4SSatish Balay x_r = x_l + 1.0; 19623270192aSSatish Balay aa = a->a + j * bs2; 19633270192aSSatish Balay for (k = 0; k < bs; k++) { 19643270192aSSatish Balay for (l = 0; l < bs; l++) { 19650e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 19669566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19673270192aSSatish Balay } 19683270192aSSatish Balay } 19693270192aSSatish Balay } 19703270192aSSatish Balay } 1971b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19723270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19733270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19749371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19759371c9d4SSatish Balay y_r = y_l + 1.0; 19769371c9d4SSatish Balay x_l = a->j[j] * bs; 19779371c9d4SSatish Balay x_r = x_l + 1.0; 19783270192aSSatish Balay aa = a->a + j * bs2; 19793270192aSSatish Balay for (k = 0; k < bs; k++) { 19803270192aSSatish Balay for (l = 0; l < bs; l++) { 19810e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19829566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19833270192aSSatish Balay } 19843270192aSSatish Balay } 19853270192aSSatish Balay } 19863270192aSSatish Balay } 1987b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19883270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19893270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19909371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19919371c9d4SSatish Balay y_r = y_l + 1.0; 19929371c9d4SSatish Balay x_l = a->j[j] * bs; 19939371c9d4SSatish Balay x_r = x_l + 1.0; 19943270192aSSatish Balay aa = a->a + j * bs2; 19953270192aSSatish Balay for (k = 0; k < bs; k++) { 19963270192aSSatish Balay for (l = 0; l < bs; l++) { 19970e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 19989566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19993270192aSSatish Balay } 20003270192aSSatish Balay } 20013270192aSSatish Balay } 20023270192aSSatish Balay } 2003d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 2004b3e7f47fSJed Brown } else { 2005b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 2006b3e7f47fSJed Brown /* first determine max of all nonzero values */ 2007b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 2008b3e7f47fSJed Brown PetscDraw popup; 2009b3e7f47fSJed Brown 2010b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 2011b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2012b3e7f47fSJed Brown } 2013383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 20149566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 20159566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2016383922c3SLisandro Dalcin 2017d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 2018b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 2019b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 20209371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 20219371c9d4SSatish Balay y_r = y_l + 1.0; 20229371c9d4SSatish Balay x_l = a->j[j] * bs; 20239371c9d4SSatish Balay x_r = x_l + 1.0; 2024b3e7f47fSJed Brown aa = a->a + j * bs2; 2025b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 2026b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 2027383922c3SLisandro Dalcin MatScalar v = *aa++; 2028383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 20299566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2030b3e7f47fSJed Brown } 2031b3e7f47fSJed Brown } 2032b3e7f47fSJed Brown } 2033b3e7f47fSJed Brown } 2034d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 2035b3e7f47fSJed Brown } 20363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 203777ed5343SBarry Smith } 20383270192aSSatish Balay 2039d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2040d71ae5a4SJacob Faibussowitsch { 20410e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 2042b0a32e0cSBarry Smith PetscDraw draw; 2043ace3abfcSBarry Smith PetscBool isnull; 20443270192aSSatish Balay 204577ed5343SBarry Smith PetscFunctionBegin; 20469566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 20479566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 20483ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 204977ed5343SBarry Smith 20509371c9d4SSatish Balay xr = A->cmap->n; 20519371c9d4SSatish Balay yr = A->rmap->N; 20529371c9d4SSatish Balay h = yr / 10.0; 20539371c9d4SSatish Balay w = xr / 10.0; 20549371c9d4SSatish Balay xr += w; 20559371c9d4SSatish Balay yr += h; 20569371c9d4SSatish Balay xl = -w; 20579371c9d4SSatish Balay yl = -h; 20589566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 20599566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 20609566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 20619566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 20629566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 20633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20643270192aSSatish Balay } 20653270192aSSatish Balay 2066d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2067d71ae5a4SJacob Faibussowitsch { 2068ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw; 20692593348eSBarry Smith 20703a40ed3dSBarry Smith PetscFunctionBegin; 20719566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 20729566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 207432077d6dSBarry Smith if (iascii) { 20759566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20760f5bd95cSBarry Smith } else if (isbinary) { 20779566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20780f5bd95cSBarry Smith } else if (isdraw) { 20799566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20805cd90555SBarry Smith } else { 2081a5e6ed63SBarry Smith Mat B; 20829566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20839566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20852593348eSBarry Smith } 20863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20872593348eSBarry Smith } 2088b6490206SBarry Smith 2089d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2090d71ae5a4SJacob Faibussowitsch { 2091cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2092c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2093c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2094d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 209597e567efSBarry Smith MatScalar *ap, *aa = a->a; 2096cd0e1443SSatish Balay 20973a40ed3dSBarry Smith PetscFunctionBegin; 20982d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 20999371c9d4SSatish Balay row = im[k]; 21009371c9d4SSatish Balay brow = row / bs; 21019371c9d4SSatish Balay if (row < 0) { 21029371c9d4SSatish Balay v += n; 21039371c9d4SSatish Balay continue; 21049371c9d4SSatish Balay } /* negative row */ 210554c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 21068e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 21078e3a54c0SPierre Jolivet ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 21082c3acbe9SBarry Smith nrow = ailen[brow]; 21092d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 21109371c9d4SSatish Balay if (in[l] < 0) { 21119371c9d4SSatish Balay v++; 21129371c9d4SSatish Balay continue; 21139371c9d4SSatish Balay } /* negative column */ 211454c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 21152d61bbb3SSatish Balay col = in[l]; 21162d61bbb3SSatish Balay bcol = col / bs; 21172d61bbb3SSatish Balay cidx = col % bs; 21182d61bbb3SSatish Balay ridx = row % bs; 21192d61bbb3SSatish Balay high = nrow; 21202d61bbb3SSatish Balay low = 0; /* assume unsorted */ 21212d61bbb3SSatish Balay while (high - low > 5) { 2122cd0e1443SSatish Balay t = (low + high) / 2; 2123cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2124cd0e1443SSatish Balay else low = t; 2125cd0e1443SSatish Balay } 2126cd0e1443SSatish Balay for (i = low; i < high; i++) { 2127cd0e1443SSatish Balay if (rp[i] > bcol) break; 2128cd0e1443SSatish Balay if (rp[i] == bcol) { 21292d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 21302d61bbb3SSatish Balay goto finished; 2131cd0e1443SSatish Balay } 2132cd0e1443SSatish Balay } 213397e567efSBarry Smith *v++ = 0.0; 21342d61bbb3SSatish Balay finished:; 2135cd0e1443SSatish Balay } 2136cd0e1443SSatish Balay } 21373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2138cd0e1443SSatish Balay } 2139cd0e1443SSatish Balay 2140d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2141d71ae5a4SJacob Faibussowitsch { 214292c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2143e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2144c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2145d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2146ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2147dd6ea824SBarry Smith const PetscScalar *value = v; 21489d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 214992c4ed94SBarry Smith 21503a40ed3dSBarry Smith PetscFunctionBegin; 21510e324ae4SSatish Balay if (roworiented) { 21520e324ae4SSatish Balay stepval = (n - 1) * bs; 21530e324ae4SSatish Balay } else { 21540e324ae4SSatish Balay stepval = (m - 1) * bs; 21550e324ae4SSatish Balay } 215692c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 215792c4ed94SBarry Smith row = im[k]; 21585ef9f2a5SBarry Smith if (row < 0) continue; 21596bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 216092c4ed94SBarry Smith rp = aj + ai[row]; 21617dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 216292c4ed94SBarry Smith rmax = imax[row]; 216392c4ed94SBarry Smith nrow = ailen[row]; 216492c4ed94SBarry Smith low = 0; 2165c71e6ed7SBarry Smith high = nrow; 216692c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 21675ef9f2a5SBarry Smith if (in[l] < 0) continue; 21686bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 216992c4ed94SBarry Smith col = in[l]; 21707dc0baabSHong Zhang if (!A->structure_only) { 217192c4ed94SBarry Smith if (roworiented) { 217253ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21730e324ae4SSatish Balay } else { 217453ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 217592c4ed94SBarry Smith } 21767dc0baabSHong Zhang } 217726fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 217826fbe8dcSKarl Rupp else high = nrow; 2179e2ee6c50SBarry Smith lastcol = col; 218092c4ed94SBarry Smith while (high - low > 7) { 218192c4ed94SBarry Smith t = (low + high) / 2; 218292c4ed94SBarry Smith if (rp[t] > col) high = t; 218392c4ed94SBarry Smith else low = t; 218492c4ed94SBarry Smith } 218592c4ed94SBarry Smith for (i = low; i < high; i++) { 218692c4ed94SBarry Smith if (rp[i] > col) break; 218792c4ed94SBarry Smith if (rp[i] == col) { 21887dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21898a84c255SSatish Balay bap = ap + bs2 * i; 21900e324ae4SSatish Balay if (roworiented) { 21918a84c255SSatish Balay if (is == ADD_VALUES) { 2192dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2193ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2194dd9472c6SBarry Smith } 21950e324ae4SSatish Balay } else { 2196dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2197ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2198dd9472c6SBarry Smith } 2199dd9472c6SBarry Smith } 22000e324ae4SSatish Balay } else { 22010e324ae4SSatish Balay if (is == ADD_VALUES) { 220253ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2203ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 220453ef36baSBarry Smith bap += bs; 2205dd9472c6SBarry Smith } 22060e324ae4SSatish Balay } else { 220753ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2208ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 220953ef36baSBarry Smith bap += bs; 22108a84c255SSatish Balay } 2211dd9472c6SBarry Smith } 2212dd9472c6SBarry Smith } 2213f1241b54SBarry Smith goto noinsert2; 221492c4ed94SBarry Smith } 221592c4ed94SBarry Smith } 221689280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 22175f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 22187dc0baabSHong Zhang if (A->structure_only) { 22197dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 22207dc0baabSHong Zhang } else { 2221fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 22227dc0baabSHong Zhang } 22239371c9d4SSatish Balay N = nrow++ - 1; 22249371c9d4SSatish Balay high++; 222592c4ed94SBarry Smith /* shift up all the later entries in this row */ 22269566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 222792c4ed94SBarry Smith rp[i] = col; 22287dc0baabSHong Zhang if (!A->structure_only) { 22299566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 22308a84c255SSatish Balay bap = ap + bs2 * i; 22310e324ae4SSatish Balay if (roworiented) { 2232dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2233ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2234dd9472c6SBarry Smith } 22350e324ae4SSatish Balay } else { 2236dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2237ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2238dd9472c6SBarry Smith } 2239dd9472c6SBarry Smith } 22407dc0baabSHong Zhang } 2241f1241b54SBarry Smith noinsert2:; 224292c4ed94SBarry Smith low = i; 224392c4ed94SBarry Smith } 224492c4ed94SBarry Smith ailen[row] = nrow; 224592c4ed94SBarry Smith } 22463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 224792c4ed94SBarry Smith } 224826e093fcSHong Zhang 2249d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2250d71ae5a4SJacob Faibussowitsch { 2251584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2252580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2253d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2254c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 22553f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 22563447b6efSHong Zhang PetscReal ratio = 0.6; 2257584200bdSSatish Balay 22583a40ed3dSBarry Smith PetscFunctionBegin; 2259d32568d8SPierre Jolivet if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2260584200bdSSatish Balay 226143ee02c3SBarry Smith if (m) rmax = ailen[0]; 2262584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2263584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2264584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2265d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2266584200bdSSatish Balay if (fshift) { 2267580bdb30SBarry Smith ip = aj + ai[i]; 2268580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2269584200bdSSatish Balay N = ailen[i]; 22709566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 227148a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2272672ba085SHong Zhang } 2273584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2274584200bdSSatish Balay } 2275584200bdSSatish Balay if (mbs) { 2276584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2277584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2278584200bdSSatish Balay } 22797c565772SBarry Smith 2280584200bdSSatish Balay /* reset ilen and imax for each row */ 22817c565772SBarry Smith a->nonzerorowcnt = 0; 2282672ba085SHong Zhang if (A->structure_only) { 22839566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2284672ba085SHong Zhang } else { /* !A->structure_only */ 2285584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2286584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22877c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2288584200bdSSatish Balay } 2289672ba085SHong Zhang } 2290a7c10996SSatish Balay a->nz = ai[mbs]; 2291584200bdSSatish Balay 2292584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2293b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2294ff6a9541SJacob Faibussowitsch if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 22955f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 22969566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 22979566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 22989566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 229926fbe8dcSKarl Rupp 23008e58a170SBarry Smith A->info.mallocs += a->reallocs; 2301e2f3b5e9SSatish Balay a->reallocs = 0; 23020e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2303647a6520SHong Zhang a->rmax = rmax; 2304cf4441caSHong Zhang 230548a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 23063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2307584200bdSSatish Balay } 2308584200bdSSatish Balay 2309bea157c4SSatish Balay /* 2310bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2311bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2312a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2313bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2314bea157c4SSatish Balay */ 2315d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2316d71ae5a4SJacob Faibussowitsch { 2317ff6a9541SJacob Faibussowitsch PetscInt j = 0; 23183a40ed3dSBarry Smith 2319433994e6SBarry Smith PetscFunctionBegin; 2320ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; j++) { 2321ff6a9541SJacob Faibussowitsch PetscInt row = idx[i]; 2322a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2323bea157c4SSatish Balay sizes[j] = 1; 2324bea157c4SSatish Balay i++; 2325e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2326bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2327bea157c4SSatish Balay i++; 23286aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2329ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_TRUE; 2330ff6a9541SJacob Faibussowitsch for (PetscInt k = 1; k < bs; k++) { 2331bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2332bea157c4SSatish Balay flg = PETSC_FALSE; 2333bea157c4SSatish Balay break; 2334d9b7c43dSSatish Balay } 2335bea157c4SSatish Balay } 2336abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2337bea157c4SSatish Balay sizes[j] = bs; 2338bea157c4SSatish Balay i += bs; 2339bea157c4SSatish Balay } else { 2340bea157c4SSatish Balay sizes[j] = 1; 2341bea157c4SSatish Balay i++; 2342bea157c4SSatish Balay } 2343bea157c4SSatish Balay } 2344bea157c4SSatish Balay } 2345bea157c4SSatish Balay *bs_max = j; 23463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2347d9b7c43dSSatish Balay } 2348d9b7c43dSSatish Balay 2349d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2350d71ae5a4SJacob Faibussowitsch { 2351d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2352f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2353d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 235487828ca2SBarry Smith PetscScalar zero = 0.0; 23553f1db9ecSBarry Smith MatScalar *aa; 235697b48c8fSBarry Smith const PetscScalar *xx; 235797b48c8fSBarry Smith PetscScalar *bb; 2358d9b7c43dSSatish Balay 23593a40ed3dSBarry Smith PetscFunctionBegin; 236097b48c8fSBarry Smith /* fix right hand side if needed */ 236197b48c8fSBarry Smith if (x && b) { 23629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23639566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 2364ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 23659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 236797b48c8fSBarry Smith } 236897b48c8fSBarry Smith 2369d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2370bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23719566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2372bea157c4SSatish Balay 2373563b5814SBarry Smith /* copy IS values to rows, and sort them */ 237426fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23759566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 237697b48c8fSBarry Smith 2377a9817697SBarry Smith if (baij->keepnonzeropattern) { 237826fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2379dffd3267SBarry Smith bs_max = is_n; 2380dffd3267SBarry Smith } else { 23819566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2382e56f5c9eSBarry Smith A->nonzerostate++; 2383dffd3267SBarry Smith } 2384bea157c4SSatish Balay 2385bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2386bea157c4SSatish Balay row = rows[j]; 23875f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2388bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2389b31fbe3bSSatish Balay aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2390a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2391d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2392bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2393bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2394bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 239526fbe8dcSKarl Rupp 23969566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2397a07cd24cSSatish Balay } 2398563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 23999927e4dfSBarry Smith for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2400f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2401bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2402f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2403bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 24046bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2405bea157c4SSatish Balay for (k = 0; k < count; k++) { 2406d9b7c43dSSatish Balay aa[0] = zero; 2407d9b7c43dSSatish Balay aa += bs; 2408d9b7c43dSSatish Balay } 24099927e4dfSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2410d9b7c43dSSatish Balay } 2411bea157c4SSatish Balay } 2412bea157c4SSatish Balay 24139566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 24149566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2416d9b7c43dSSatish Balay } 24171c351548SSatish Balay 2418ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2419d71ae5a4SJacob Faibussowitsch { 242097b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 242197b48c8fSBarry Smith PetscInt i, j, k, count; 242297b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 242397b48c8fSBarry Smith PetscScalar zero = 0.0; 242497b48c8fSBarry Smith MatScalar *aa; 242597b48c8fSBarry Smith const PetscScalar *xx; 242697b48c8fSBarry Smith PetscScalar *bb; 242756777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 242897b48c8fSBarry Smith 242997b48c8fSBarry Smith PetscFunctionBegin; 243097b48c8fSBarry Smith /* fix right hand side if needed */ 243197b48c8fSBarry Smith if (x && b) { 24329566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 24339566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 243456777dd2SBarry Smith vecs = PETSC_TRUE; 243597b48c8fSBarry Smith } 243697b48c8fSBarry Smith 243797b48c8fSBarry Smith /* zero the columns */ 24389566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 243997b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 24405f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 244197b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 244297b48c8fSBarry Smith } 244397b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 244497b48c8fSBarry Smith if (!zeroed[i]) { 244597b48c8fSBarry Smith row = i / bs; 244697b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 244797b48c8fSBarry Smith for (k = 0; k < bs; k++) { 244897b48c8fSBarry Smith col = bs * baij->j[j] + k; 244997b48c8fSBarry Smith if (zeroed[col]) { 245097b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 245156777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 245297b48c8fSBarry Smith aa[0] = 0.0; 245397b48c8fSBarry Smith } 245497b48c8fSBarry Smith } 245597b48c8fSBarry Smith } 245656777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 245797b48c8fSBarry Smith } 24589566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 245956777dd2SBarry Smith if (vecs) { 24609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 24619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 246256777dd2SBarry Smith } 246397b48c8fSBarry Smith 246497b48c8fSBarry Smith /* zero the rows */ 246597b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 246697b48c8fSBarry Smith row = is_idx[i]; 246797b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 246897b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 246997b48c8fSBarry Smith for (k = 0; k < count; k++) { 247097b48c8fSBarry Smith aa[0] = zero; 247197b48c8fSBarry Smith aa += bs; 247297b48c8fSBarry Smith } 2473dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 247497b48c8fSBarry Smith } 24759566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 247797b48c8fSBarry Smith } 247897b48c8fSBarry Smith 2479d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2480d71ae5a4SJacob Faibussowitsch { 24812d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2482e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2483c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2484d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2485c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2486ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2487d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24882d61bbb3SSatish Balay 24892d61bbb3SSatish Balay PetscFunctionBegin; 24902d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2491085a36d4SBarry Smith row = im[k]; 2492085a36d4SBarry Smith brow = row / bs; 24935ef9f2a5SBarry Smith if (row < 0) continue; 24946bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24958e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 24968e3a54c0SPierre Jolivet if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 24972d61bbb3SSatish Balay rmax = imax[brow]; 24982d61bbb3SSatish Balay nrow = ailen[brow]; 24992d61bbb3SSatish Balay low = 0; 2500c71e6ed7SBarry Smith high = nrow; 25012d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 25025ef9f2a5SBarry Smith if (in[l] < 0) continue; 25036bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 25049371c9d4SSatish Balay col = in[l]; 25059371c9d4SSatish Balay bcol = col / bs; 25069371c9d4SSatish Balay ridx = row % bs; 25079371c9d4SSatish Balay cidx = col % bs; 2508672ba085SHong Zhang if (!A->structure_only) { 25092d61bbb3SSatish Balay if (roworiented) { 25105ef9f2a5SBarry Smith value = v[l + k * n]; 25112d61bbb3SSatish Balay } else { 25122d61bbb3SSatish Balay value = v[k + l * m]; 25132d61bbb3SSatish Balay } 2514672ba085SHong Zhang } 25159371c9d4SSatish Balay if (col <= lastcol) low = 0; 25169371c9d4SSatish Balay else high = nrow; 2517e2ee6c50SBarry Smith lastcol = col; 25182d61bbb3SSatish Balay while (high - low > 7) { 25192d61bbb3SSatish Balay t = (low + high) / 2; 25202d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 25212d61bbb3SSatish Balay else low = t; 25222d61bbb3SSatish Balay } 25232d61bbb3SSatish Balay for (i = low; i < high; i++) { 25242d61bbb3SSatish Balay if (rp[i] > bcol) break; 25252d61bbb3SSatish Balay if (rp[i] == bcol) { 25268e3a54c0SPierre Jolivet bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2527672ba085SHong Zhang if (!A->structure_only) { 25282d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 25292d61bbb3SSatish Balay else *bap = value; 2530672ba085SHong Zhang } 25312d61bbb3SSatish Balay goto noinsert1; 25322d61bbb3SSatish Balay } 25332d61bbb3SSatish Balay } 25342d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 25355f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2536672ba085SHong Zhang if (A->structure_only) { 2537672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2538672ba085SHong Zhang } else { 2539fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2540672ba085SHong Zhang } 25419371c9d4SSatish Balay N = nrow++ - 1; 25429371c9d4SSatish Balay high++; 25432d61bbb3SSatish Balay /* shift up all the later entries in this row */ 25449566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 25452d61bbb3SSatish Balay rp[i] = bcol; 2546580bdb30SBarry Smith if (!A->structure_only) { 25479566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 25489566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2549580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2550580bdb30SBarry Smith } 2551085a36d4SBarry Smith a->nz++; 2552e56f5c9eSBarry Smith A->nonzerostate++; 25532d61bbb3SSatish Balay noinsert1:; 25542d61bbb3SSatish Balay low = i; 25552d61bbb3SSatish Balay } 25562d61bbb3SSatish Balay ailen[brow] = nrow; 25572d61bbb3SSatish Balay } 25583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25592d61bbb3SSatish Balay } 25602d61bbb3SSatish Balay 2561ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2562d71ae5a4SJacob Faibussowitsch { 25632d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 25642d61bbb3SSatish Balay Mat outA; 2565ace3abfcSBarry Smith PetscBool row_identity, col_identity; 25662d61bbb3SSatish Balay 25672d61bbb3SSatish Balay PetscFunctionBegin; 25685f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 25699566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25709566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25715f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25722d61bbb3SSatish Balay 25732d61bbb3SSatish Balay outA = inA; 2574d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25759566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25769566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25772d61bbb3SSatish Balay 25789566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2579cf242676SKris Buschelman 25809566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25819566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2582c3122656SLisandro Dalcin a->row = row; 25839566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2585c3122656SLisandro Dalcin a->col = col; 2586c38d4ed2SBarry Smith 2587c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25899566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2590c38d4ed2SBarry Smith 25919566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2592aa624791SPierre Jolivet if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 25939566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25952d61bbb3SSatish Balay } 2596d9b7c43dSSatish Balay 2597ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2598d71ae5a4SJacob Faibussowitsch { 259927a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 260027a8da17SBarry Smith 260127a8da17SBarry Smith PetscFunctionBegin; 2602ff6a9541SJacob Faibussowitsch baij->nz = baij->maxnz; 2603ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2604ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 26053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 260627a8da17SBarry Smith } 260727a8da17SBarry Smith 260827a8da17SBarry Smith /*@ 2609*d8a51d2aSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 261027a8da17SBarry Smith 261127a8da17SBarry Smith Input Parameters: 261211a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix 2613*d8a51d2aSBarry Smith - indices - the block column indices 261427a8da17SBarry Smith 261515091d37SBarry Smith Level: advanced 261615091d37SBarry Smith 261727a8da17SBarry Smith Notes: 261827a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 261927a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 262011a5261eSBarry Smith of the `MatSetValues()` operation. 262127a8da17SBarry Smith 262227a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 262311a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 262427a8da17SBarry Smith 262511a5261eSBarry Smith MUST be called before any calls to `MatSetValues()` 262627a8da17SBarry Smith 26271cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 262827a8da17SBarry Smith @*/ 2629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2630d71ae5a4SJacob Faibussowitsch { 263127a8da17SBarry Smith PetscFunctionBegin; 26320700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 26334f572ea9SToby Isaac PetscAssertPointer(indices, 2); 2634810441c8SPierre Jolivet PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 26353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 263627a8da17SBarry Smith } 263727a8da17SBarry Smith 263866976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2639d71ae5a4SJacob Faibussowitsch { 2640273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2641c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2642273d9f13SBarry Smith PetscReal atmp; 264387828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2644273d9f13SBarry Smith MatScalar *aa; 2645c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2646273d9f13SBarry Smith 2647273d9f13SBarry Smith PetscFunctionBegin; 26485f80ce2aSJacob Faibussowitsch /* why is this not a macro???????????????????????????????????????????????????????????????? */ 26495f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2650d0f46423SBarry Smith bs = A->rmap->bs; 2651273d9f13SBarry Smith aa = a->a; 2652273d9f13SBarry Smith ai = a->i; 2653273d9f13SBarry Smith aj = a->j; 2654273d9f13SBarry Smith mbs = a->mbs; 2655273d9f13SBarry Smith 26569566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 26579566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 26589566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 26595f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2660273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 26619371c9d4SSatish Balay ncols = ai[1] - ai[0]; 26629371c9d4SSatish Balay ai++; 2663273d9f13SBarry Smith brow = bs * i; 2664273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2665273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2666273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 26679371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 26689371c9d4SSatish Balay aa++; 2669273d9f13SBarry Smith row = brow + krow; /* row index */ 26709371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 26719371c9d4SSatish Balay x[row] = atmp; 26729371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26739371c9d4SSatish Balay } 2674273d9f13SBarry Smith } 2675273d9f13SBarry Smith } 2676273d9f13SBarry Smith aj++; 2677273d9f13SBarry Smith } 2678273d9f13SBarry Smith } 26799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 26803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2681273d9f13SBarry Smith } 2682273d9f13SBarry Smith 2683eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2684eede4a3fSMark Adams { 2685eede4a3fSMark Adams Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2686eede4a3fSMark Adams PetscInt i, j, n, row, bs, *ai, mbs; 2687eede4a3fSMark Adams PetscReal atmp; 2688eede4a3fSMark Adams PetscScalar *x, zero = 0.0; 2689eede4a3fSMark Adams MatScalar *aa; 2690eede4a3fSMark Adams PetscInt ncols, brow, krow, kcol; 2691eede4a3fSMark Adams 2692eede4a3fSMark Adams PetscFunctionBegin; 2693eede4a3fSMark Adams PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2694eede4a3fSMark Adams bs = A->rmap->bs; 2695eede4a3fSMark Adams aa = a->a; 2696eede4a3fSMark Adams ai = a->i; 2697eede4a3fSMark Adams mbs = a->mbs; 2698eede4a3fSMark Adams 2699eede4a3fSMark Adams PetscCall(VecSet(v, zero)); 2700eede4a3fSMark Adams PetscCall(VecGetArrayWrite(v, &x)); 2701eede4a3fSMark Adams PetscCall(VecGetLocalSize(v, &n)); 2702eede4a3fSMark Adams PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2703eede4a3fSMark Adams for (i = 0; i < mbs; i++) { 2704eede4a3fSMark Adams ncols = ai[1] - ai[0]; 2705eede4a3fSMark Adams ai++; 2706eede4a3fSMark Adams brow = bs * i; 2707eede4a3fSMark Adams for (j = 0; j < ncols; j++) { 2708eede4a3fSMark Adams for (kcol = 0; kcol < bs; kcol++) { 2709eede4a3fSMark Adams for (krow = 0; krow < bs; krow++) { 2710eede4a3fSMark Adams atmp = PetscAbsScalar(*aa); 2711eede4a3fSMark Adams aa++; 2712eede4a3fSMark Adams row = brow + krow; /* row index */ 2713eede4a3fSMark Adams x[row] += atmp; 2714eede4a3fSMark Adams } 2715eede4a3fSMark Adams } 2716eede4a3fSMark Adams } 2717eede4a3fSMark Adams } 2718eede4a3fSMark Adams PetscCall(VecRestoreArrayWrite(v, &x)); 2719eede4a3fSMark Adams PetscFunctionReturn(PETSC_SUCCESS); 2720eede4a3fSMark Adams } 2721eede4a3fSMark Adams 272266976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2723d71ae5a4SJacob Faibussowitsch { 27243c896bc6SHong Zhang PetscFunctionBegin; 27253c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 27263c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 27273c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27283c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2729d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 27303c896bc6SHong Zhang 27315f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 27325f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 27339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 27349566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 27353c896bc6SHong Zhang } else { 27369566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 27373c896bc6SHong Zhang } 27383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27393c896bc6SHong Zhang } 27403c896bc6SHong Zhang 2741d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2742d71ae5a4SJacob Faibussowitsch { 2743f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27446e111a19SKarl Rupp 2745f2a5309cSSatish Balay PetscFunctionBegin; 2746f2a5309cSSatish Balay *array = a->a; 27473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2748f2a5309cSSatish Balay } 2749f2a5309cSSatish Balay 2750d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2751d71ae5a4SJacob Faibussowitsch { 2752f2a5309cSSatish Balay PetscFunctionBegin; 2753cda14afcSprj- *array = NULL; 27543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2755f2a5309cSSatish Balay } 2756f2a5309cSSatish Balay 2757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2758d71ae5a4SJacob Faibussowitsch { 2759b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 276052768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 276152768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 276252768537SHong Zhang 276352768537SHong Zhang PetscFunctionBegin; 276452768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 27659566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 27663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 276752768537SHong Zhang } 276852768537SHong Zhang 2769d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2770d71ae5a4SJacob Faibussowitsch { 277142ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 277231ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2773e838b9e7SJed Brown PetscBLASInt one = 1; 277442ee4b1aSHong Zhang 277542ee4b1aSHong Zhang PetscFunctionBegin; 2776134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2777134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2778134adf20SPierre Jolivet if (e) { 27799566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2780134adf20SPierre Jolivet if (e) { 27819566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2782134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2783134adf20SPierre Jolivet } 2784134adf20SPierre Jolivet } 278554c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2786134adf20SPierre Jolivet } 278742ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2788f4df32b1SMatthew Knepley PetscScalar alpha = a; 2789c5df96a5SBarry Smith PetscBLASInt bnz; 27909566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2791792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 27929566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2793ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 27949566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 279542ee4b1aSHong Zhang } else { 279652768537SHong Zhang Mat B; 279752768537SHong Zhang PetscInt *nnz; 279854c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 28009566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 28019566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 28029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 28039566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 28049566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 28059566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 28069566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 28079566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 28089566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 28099566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 281042ee4b1aSHong Zhang } 28113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 281242ee4b1aSHong Zhang } 281342ee4b1aSHong Zhang 2814d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2815d71ae5a4SJacob Faibussowitsch { 2816ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 28172726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 28182726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 28192726fb6dSPierre Jolivet MatScalar *aa = a->a; 28202726fb6dSPierre Jolivet 28212726fb6dSPierre Jolivet PetscFunctionBegin; 28222726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 28233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2824ff6a9541SJacob Faibussowitsch #else 2825ff6a9541SJacob Faibussowitsch (void)A; 2826ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2827ff6a9541SJacob Faibussowitsch #endif 28282726fb6dSPierre Jolivet } 28292726fb6dSPierre Jolivet 2830ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2831d71ae5a4SJacob Faibussowitsch { 2832ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 283399cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 283499cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2835dd6ea824SBarry Smith MatScalar *aa = a->a; 283699cafbc1SBarry Smith 283799cafbc1SBarry Smith PetscFunctionBegin; 283899cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 28393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2840ff6a9541SJacob Faibussowitsch #else 2841ff6a9541SJacob Faibussowitsch (void)A; 2842ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2843ff6a9541SJacob Faibussowitsch #endif 284499cafbc1SBarry Smith } 284599cafbc1SBarry Smith 2846ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2847d71ae5a4SJacob Faibussowitsch { 2848ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 284999cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 285099cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2851dd6ea824SBarry Smith MatScalar *aa = a->a; 285299cafbc1SBarry Smith 285399cafbc1SBarry Smith PetscFunctionBegin; 285499cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 28553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2856ff6a9541SJacob Faibussowitsch #else 2857ff6a9541SJacob Faibussowitsch (void)A; 2858ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2859ff6a9541SJacob Faibussowitsch #endif 286099cafbc1SBarry Smith } 286199cafbc1SBarry Smith 28623acb8795SBarry Smith /* 28632479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 28643acb8795SBarry Smith */ 2865ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2866d71ae5a4SJacob Faibussowitsch { 28673acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 28683acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 28693acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 28703acb8795SBarry Smith 28713acb8795SBarry Smith PetscFunctionBegin; 28723acb8795SBarry Smith *nn = n; 28733ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28745f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 28759566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28783acb8795SBarry Smith jj = a->j; 2879ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 28803acb8795SBarry Smith cia[0] = oshift; 2881ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28829566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 28833acb8795SBarry Smith jj = a->j; 28843acb8795SBarry Smith for (row = 0; row < m; row++) { 28853acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 28863acb8795SBarry Smith for (i = 0; i < mr; i++) { 28873acb8795SBarry Smith col = *jj++; 288826fbe8dcSKarl Rupp 28893acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 28903acb8795SBarry Smith } 28913acb8795SBarry Smith } 28929566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 28939371c9d4SSatish Balay *ia = cia; 28949371c9d4SSatish Balay *ja = cja; 28953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28963acb8795SBarry Smith } 28973acb8795SBarry Smith 2898ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2899d71ae5a4SJacob Faibussowitsch { 29003acb8795SBarry Smith PetscFunctionBegin; 29013ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 29029566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 29039566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 29043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29053acb8795SBarry Smith } 29063acb8795SBarry Smith 2907525d23c0SHong Zhang /* 2908525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2909525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2910040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2911525d23c0SHong Zhang */ 2912d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2913d71ae5a4SJacob Faibussowitsch { 2914525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2915c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2916525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2917525d23c0SHong Zhang PetscInt *cspidx; 2918f6d58c54SBarry Smith 2919f6d58c54SBarry Smith PetscFunctionBegin; 2920525d23c0SHong Zhang *nn = n; 29213ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2922f6d58c54SBarry Smith 29239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 29249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 29259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 29269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2927525d23c0SHong Zhang jj = a->j; 2928ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 2929525d23c0SHong Zhang cia[0] = oshift; 2930ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 29319566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2932525d23c0SHong Zhang jj = a->j; 2933525d23c0SHong Zhang for (row = 0; row < m; row++) { 2934525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2935525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2936525d23c0SHong Zhang col = *jj++; 2937525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2938525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2939525d23c0SHong Zhang } 2940525d23c0SHong Zhang } 29419566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2942071fcb05SBarry Smith *ia = cia; 2943071fcb05SBarry Smith *ja = cja; 2944525d23c0SHong Zhang *spidx = cspidx; 29453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2946f6d58c54SBarry Smith } 2947f6d58c54SBarry Smith 2948d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2949d71ae5a4SJacob Faibussowitsch { 2950525d23c0SHong Zhang PetscFunctionBegin; 29519566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 29529566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 29533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2954f6d58c54SBarry Smith } 295599cafbc1SBarry Smith 295666976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2957d71ae5a4SJacob Faibussowitsch { 29587d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 29597d68702bSBarry Smith 29607d68702bSBarry Smith PetscFunctionBegin; 296148a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 29629566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 29633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29647d68702bSBarry Smith } 29657d68702bSBarry Smith 296617ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 296717ea310bSPierre Jolivet { 296817ea310bSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 296917ea310bSPierre Jolivet PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 297017ea310bSPierre Jolivet PetscInt m = A->rmap->N, *ailen = a->ilen; 297117ea310bSPierre Jolivet PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 297217ea310bSPierre Jolivet MatScalar *aa = a->a, *ap; 297317ea310bSPierre Jolivet PetscBool zero; 297417ea310bSPierre Jolivet 297517ea310bSPierre Jolivet PetscFunctionBegin; 297617ea310bSPierre Jolivet PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 297717ea310bSPierre Jolivet if (m) rmax = ailen[0]; 297817ea310bSPierre Jolivet for (i = 1; i <= mbs; i++) { 297917ea310bSPierre Jolivet for (k = ai[i - 1]; k < ai[i]; k++) { 298017ea310bSPierre Jolivet zero = PETSC_TRUE; 298117ea310bSPierre Jolivet ap = aa + bs2 * k; 298217ea310bSPierre Jolivet for (j = 0; j < bs2 && zero; j++) { 298317ea310bSPierre Jolivet if (ap[j] != 0.0) zero = PETSC_FALSE; 298417ea310bSPierre Jolivet } 298517ea310bSPierre Jolivet if (zero && (aj[k] != i - 1 || !keep)) fshift++; 298617ea310bSPierre Jolivet else { 298717ea310bSPierre Jolivet if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 298817ea310bSPierre Jolivet aj[k - fshift] = aj[k]; 298917ea310bSPierre Jolivet PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 299017ea310bSPierre Jolivet } 299117ea310bSPierre Jolivet } 299217ea310bSPierre Jolivet ai[i - 1] -= fshift_prev; 299317ea310bSPierre Jolivet fshift_prev = fshift; 299417ea310bSPierre Jolivet ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 299517ea310bSPierre Jolivet a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 299617ea310bSPierre Jolivet rmax = PetscMax(rmax, ailen[i - 1]); 299717ea310bSPierre Jolivet } 299817ea310bSPierre Jolivet if (fshift) { 299917ea310bSPierre Jolivet if (mbs) { 300017ea310bSPierre Jolivet ai[mbs] -= fshift; 300117ea310bSPierre Jolivet a->nz = ai[mbs]; 300217ea310bSPierre Jolivet } 300317ea310bSPierre Jolivet PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 300417ea310bSPierre Jolivet A->nonzerostate++; 300517ea310bSPierre Jolivet A->info.nz_unneeded += (PetscReal)fshift; 300617ea310bSPierre Jolivet a->rmax = rmax; 300717ea310bSPierre Jolivet PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 300817ea310bSPierre Jolivet PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 300917ea310bSPierre Jolivet } 301017ea310bSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 301117ea310bSPierre Jolivet } 301217ea310bSPierre Jolivet 3013dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 3014cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 3015cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 3016cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 301797304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 30187c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 30197c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 3020f4259b30SLisandro Dalcin NULL, 3021f4259b30SLisandro Dalcin NULL, 3022f4259b30SLisandro Dalcin NULL, 3023f4259b30SLisandro Dalcin /* 10*/ NULL, 3024cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 3025f4259b30SLisandro Dalcin NULL, 3026f4259b30SLisandro Dalcin NULL, 3027f2501298SSatish Balay MatTranspose_SeqBAIJ, 302897304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 3029cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 3030cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 3031cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 3032cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 3033f4259b30SLisandro Dalcin /* 20*/ NULL, 3034cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 3035cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 3036cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 3037d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 3038f4259b30SLisandro Dalcin NULL, 3039f4259b30SLisandro Dalcin NULL, 3040f4259b30SLisandro Dalcin NULL, 3041f4259b30SLisandro Dalcin NULL, 304226cec326SBarry Smith /* 29*/ MatSetUp_Seq_Hash, 3043f4259b30SLisandro Dalcin NULL, 3044f4259b30SLisandro Dalcin NULL, 3045f4259b30SLisandro Dalcin NULL, 3046f4259b30SLisandro Dalcin NULL, 3047d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 3048f4259b30SLisandro Dalcin NULL, 3049f4259b30SLisandro Dalcin NULL, 3050cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 3051f4259b30SLisandro Dalcin NULL, 3052d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 30537dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 3054cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 3055cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 30563c896bc6SHong Zhang MatCopy_SeqBAIJ, 3057f4259b30SLisandro Dalcin /* 44*/ NULL, 3058cc2dc46cSBarry Smith MatScale_SeqBAIJ, 30597d68702bSBarry Smith MatShift_SeqBAIJ, 3060f4259b30SLisandro Dalcin NULL, 306197b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 3062f4259b30SLisandro Dalcin /* 49*/ NULL, 30633b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 306492c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 30653acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 30663acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 306793dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 3068f4259b30SLisandro Dalcin NULL, 3069f4259b30SLisandro Dalcin NULL, 3070090001bdSToby Isaac NULL, 3071d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 30727dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3073b9b97703SBarry Smith MatDestroy_SeqBAIJ, 3074b9b97703SBarry Smith MatView_SeqBAIJ, 3075f4259b30SLisandro Dalcin NULL, 3076f4259b30SLisandro Dalcin NULL, 3077f4259b30SLisandro Dalcin /* 64*/ NULL, 3078f4259b30SLisandro Dalcin NULL, 3079f4259b30SLisandro Dalcin NULL, 3080f4259b30SLisandro Dalcin NULL, 3081f4259b30SLisandro Dalcin NULL, 3082d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3083f4259b30SLisandro Dalcin NULL, 3084c87e5d42SMatthew Knepley MatConvert_Basic, 3085f4259b30SLisandro Dalcin NULL, 3086f4259b30SLisandro Dalcin NULL, 3087f4259b30SLisandro Dalcin /* 74*/ NULL, 3088f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 3089f4259b30SLisandro Dalcin NULL, 3090f4259b30SLisandro Dalcin NULL, 3091f4259b30SLisandro Dalcin NULL, 3092f4259b30SLisandro Dalcin /* 79*/ NULL, 3093f4259b30SLisandro Dalcin NULL, 3094f4259b30SLisandro Dalcin NULL, 3095f4259b30SLisandro Dalcin NULL, 30965bba2384SShri Abhyankar MatLoad_SeqBAIJ, 3097f4259b30SLisandro Dalcin /* 84*/ NULL, 3098f4259b30SLisandro Dalcin NULL, 3099f4259b30SLisandro Dalcin NULL, 3100f4259b30SLisandro Dalcin NULL, 3101f4259b30SLisandro Dalcin NULL, 3102f4259b30SLisandro Dalcin /* 89*/ NULL, 3103f4259b30SLisandro Dalcin NULL, 3104f4259b30SLisandro Dalcin NULL, 3105f4259b30SLisandro Dalcin NULL, 3106f4259b30SLisandro Dalcin NULL, 3107f4259b30SLisandro Dalcin /* 94*/ NULL, 3108f4259b30SLisandro Dalcin NULL, 3109f4259b30SLisandro Dalcin NULL, 3110f4259b30SLisandro Dalcin NULL, 3111f4259b30SLisandro Dalcin NULL, 3112f4259b30SLisandro Dalcin /* 99*/ NULL, 3113f4259b30SLisandro Dalcin NULL, 3114f4259b30SLisandro Dalcin NULL, 31152726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 3116f4259b30SLisandro Dalcin NULL, 3117f4259b30SLisandro Dalcin /*104*/ NULL, 311899cafbc1SBarry Smith MatRealPart_SeqBAIJ, 31192af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 3120f4259b30SLisandro Dalcin NULL, 3121f4259b30SLisandro Dalcin NULL, 3122f4259b30SLisandro Dalcin /*109*/ NULL, 3123f4259b30SLisandro Dalcin NULL, 3124f4259b30SLisandro Dalcin NULL, 3125f4259b30SLisandro Dalcin NULL, 3126547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 3127f4259b30SLisandro Dalcin /*114*/ NULL, 3128f4259b30SLisandro Dalcin NULL, 3129f4259b30SLisandro Dalcin NULL, 3130f4259b30SLisandro Dalcin NULL, 3131f4259b30SLisandro Dalcin NULL, 3132f4259b30SLisandro Dalcin /*119*/ NULL, 3133f4259b30SLisandro Dalcin NULL, 3134547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 3135d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 3136f4259b30SLisandro Dalcin NULL, 3137f4259b30SLisandro Dalcin /*124*/ NULL, 3138857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 31393964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 3140f4259b30SLisandro Dalcin NULL, 3141f4259b30SLisandro Dalcin NULL, 3142f4259b30SLisandro Dalcin /*129*/ NULL, 3143f4259b30SLisandro Dalcin NULL, 3144f4259b30SLisandro Dalcin NULL, 3145f4259b30SLisandro Dalcin NULL, 3146f4259b30SLisandro Dalcin NULL, 3147f4259b30SLisandro Dalcin /*134*/ NULL, 3148f4259b30SLisandro Dalcin NULL, 3149f4259b30SLisandro Dalcin NULL, 3150f4259b30SLisandro Dalcin NULL, 3151f4259b30SLisandro Dalcin NULL, 315246533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 3153f4259b30SLisandro Dalcin NULL, 3154f4259b30SLisandro Dalcin NULL, 3155bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 3156f4259b30SLisandro Dalcin NULL, 315786e85357SHong Zhang /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3158d70f29a3SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 3159d70f29a3SPierre Jolivet NULL, 316099a7f59eSMark Adams NULL, 316199a7f59eSMark Adams NULL, 31627fb60732SBarry Smith NULL, 31637fb60732SBarry Smith /*150*/ NULL, 3164eede4a3fSMark Adams MatEliminateZeros_SeqBAIJ, 3165eede4a3fSMark Adams MatGetRowSumAbs_SeqBAIJ}; 31662593348eSBarry Smith 3167ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3168d71ae5a4SJacob Faibussowitsch { 31693e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31708ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31713e90b805SBarry Smith 31723e90b805SBarry Smith PetscFunctionBegin; 31735f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31743e90b805SBarry Smith 31753e90b805SBarry Smith /* allocate space for values if not already there */ 3176ff6a9541SJacob Faibussowitsch if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 31773e90b805SBarry Smith 31783e90b805SBarry Smith /* copy values over */ 31799566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 31803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31813e90b805SBarry Smith } 31823e90b805SBarry Smith 3183ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3184d71ae5a4SJacob Faibussowitsch { 31853e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31868ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31873e90b805SBarry Smith 31883e90b805SBarry Smith PetscFunctionBegin; 31895f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31905f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 31913e90b805SBarry Smith 31923e90b805SBarry Smith /* copy values over */ 31939566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 31943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31953e90b805SBarry Smith } 31963e90b805SBarry Smith 3197cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3198cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3199273d9f13SBarry Smith 3200f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3201d71ae5a4SJacob Faibussowitsch { 3202ad79cf63SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3203535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 32048afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3205a23d5eceSKris Buschelman 3206a23d5eceSKris Buschelman PetscFunctionBegin; 3207ad79cf63SBarry Smith if (B->hash_active) { 3208ad79cf63SBarry Smith PetscInt bs; 3209aea10558SJacob Faibussowitsch B->ops[0] = b->cops; 3210ad79cf63SBarry Smith PetscCall(PetscHMapIJVDestroy(&b->ht)); 3211ad79cf63SBarry Smith PetscCall(MatGetBlockSize(B, &bs)); 3212ad79cf63SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3213ad79cf63SBarry Smith PetscCall(PetscFree(b->dnz)); 3214ad79cf63SBarry Smith PetscCall(PetscFree(b->bdnz)); 3215ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 3216ad79cf63SBarry Smith } 32172576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3218ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3219ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3220ab93d7beSBarry Smith nz = 0; 3221ab93d7beSBarry Smith } 32228c07d4e3SBarry Smith 32239566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 32249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 32269566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3227899cda47SBarry Smith 3228899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3229899cda47SBarry Smith 3230d0f46423SBarry Smith mbs = B->rmap->n / bs; 3231d0f46423SBarry Smith nbs = B->cmap->n / bs; 3232a23d5eceSKris Buschelman bs2 = bs * bs; 3233a23d5eceSKris Buschelman 32345f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3235a23d5eceSKris Buschelman 3236a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 32375f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3238a23d5eceSKris Buschelman if (nnz) { 3239a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 32405f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 32415f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3242a23d5eceSKris Buschelman } 3243a23d5eceSKris Buschelman } 3244a23d5eceSKris Buschelman 3245d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 32469566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3247d0609cedSBarry Smith PetscOptionsEnd(); 32488c07d4e3SBarry Smith 3249a23d5eceSKris Buschelman if (!flg) { 3250a23d5eceSKris Buschelman switch (bs) { 3251a23d5eceSKris Buschelman case 1: 3252a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3253a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3254a23d5eceSKris Buschelman break; 3255a23d5eceSKris Buschelman case 2: 3256a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3257a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3258a23d5eceSKris Buschelman break; 3259a23d5eceSKris Buschelman case 3: 3260a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3261a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3262a23d5eceSKris Buschelman break; 3263a23d5eceSKris Buschelman case 4: 3264a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3265a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3266a23d5eceSKris Buschelman break; 3267a23d5eceSKris Buschelman case 5: 3268a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3269a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3270a23d5eceSKris Buschelman break; 3271a23d5eceSKris Buschelman case 6: 3272a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3273a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3274a23d5eceSKris Buschelman break; 3275a23d5eceSKris Buschelman case 7: 3276a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3277a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3278a23d5eceSKris Buschelman break; 32799371c9d4SSatish Balay case 9: { 32806679dcc1SBarry Smith PetscInt version = 1; 32819566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32826679dcc1SBarry Smith switch (version) { 32835f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32846679dcc1SBarry Smith case 1: 328596e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 328696e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 32879566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32886679dcc1SBarry Smith break; 32896679dcc1SBarry Smith #endif 32906679dcc1SBarry Smith default: 329196e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 329296e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32939566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 329496e086a2SDaniel Kokron break; 32956679dcc1SBarry Smith } 32966679dcc1SBarry Smith break; 32976679dcc1SBarry Smith } 3298ebada01fSBarry Smith case 11: 3299ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3300ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3301ebada01fSBarry Smith break; 33029371c9d4SSatish Balay case 12: { 33036679dcc1SBarry Smith PetscInt version = 1; 33049566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 33056679dcc1SBarry Smith switch (version) { 33066679dcc1SBarry Smith case 1: 33076679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 33086679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 33099566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33108ab949d8SShri Abhyankar break; 33116679dcc1SBarry Smith case 2: 33126679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 33136679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 33149566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33156679dcc1SBarry Smith break; 33166679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 33176679dcc1SBarry Smith case 3: 33186679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 33196679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 33209566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 33216679dcc1SBarry Smith break; 33226679dcc1SBarry Smith #endif 3323a23d5eceSKris Buschelman default: 3324a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3325a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 33269566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 33276679dcc1SBarry Smith break; 33286679dcc1SBarry Smith } 33296679dcc1SBarry Smith break; 33306679dcc1SBarry Smith } 33319371c9d4SSatish Balay case 15: { 33326679dcc1SBarry Smith PetscInt version = 1; 33339566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 33346679dcc1SBarry Smith switch (version) { 33356679dcc1SBarry Smith case 1: 33366679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 33379566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33386679dcc1SBarry Smith break; 33396679dcc1SBarry Smith case 2: 33406679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 33419566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33426679dcc1SBarry Smith break; 33436679dcc1SBarry Smith case 3: 33446679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 33459566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33466679dcc1SBarry Smith break; 33476679dcc1SBarry Smith case 4: 33486679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 33499566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 33506679dcc1SBarry Smith break; 33516679dcc1SBarry Smith default: 33526679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 33539566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 33546679dcc1SBarry Smith break; 33556679dcc1SBarry Smith } 33566679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 33576679dcc1SBarry Smith break; 33586679dcc1SBarry Smith } 33596679dcc1SBarry Smith default: 33606679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 33616679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 33629566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3363a23d5eceSKris Buschelman break; 3364a23d5eceSKris Buschelman } 3365a23d5eceSKris Buschelman } 3366e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3367a23d5eceSKris Buschelman b->mbs = mbs; 3368a23d5eceSKris Buschelman b->nbs = nbs; 3369ab93d7beSBarry Smith if (!skipallocation) { 33702ee49352SLisandro Dalcin if (!b->imax) { 33719566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 337226fbe8dcSKarl Rupp 33734fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 33742ee49352SLisandro Dalcin } 3375ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 337626fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3377a23d5eceSKris Buschelman if (!nnz) { 3378a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3379c62bd62aSJed Brown else if (nz < 0) nz = 1; 33805d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3381a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 33829566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3383a23d5eceSKris Buschelman } else { 3384c73702f5SBarry Smith PetscInt64 nz64 = 0; 33859371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 33869371c9d4SSatish Balay b->imax[i] = nnz[i]; 33879371c9d4SSatish Balay nz64 += nnz[i]; 33889371c9d4SSatish Balay } 33899566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3390a23d5eceSKris Buschelman } 3391a23d5eceSKris Buschelman 3392a23d5eceSKris Buschelman /* allocate the matrix space */ 33939566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3394672ba085SHong Zhang if (B->structure_only) { 33959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &b->j)); 33969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3397672ba085SHong Zhang } else { 33986679dcc1SBarry Smith PetscInt nzbs2 = 0; 33999566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 34009566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 34019566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->a, nz * bs2)); 3402672ba085SHong Zhang } 34039566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->j, nz)); 340426fbe8dcSKarl Rupp 3405672ba085SHong Zhang if (B->structure_only) { 3406672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3407672ba085SHong Zhang b->free_a = PETSC_FALSE; 3408672ba085SHong Zhang } else { 3409a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3410672ba085SHong Zhang b->free_a = PETSC_TRUE; 3411672ba085SHong Zhang } 3412672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3413672ba085SHong Zhang 3414a23d5eceSKris Buschelman b->i[0] = 0; 3415ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3416672ba085SHong Zhang 3417e811da20SHong Zhang } else { 3418e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3419e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3420ab93d7beSBarry Smith } 3421a23d5eceSKris Buschelman 3422a23d5eceSKris Buschelman b->bs2 = bs2; 3423a23d5eceSKris Buschelman b->mbs = mbs; 3424a23d5eceSKris Buschelman b->nz = 0; 3425b32cb4a7SJed Brown b->maxnz = nz; 3426b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3427cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3428cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 34299566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 34303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3431a23d5eceSKris Buschelman } 3432a23d5eceSKris Buschelman 343366976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3434d71ae5a4SJacob Faibussowitsch { 3435725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3436f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3437d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3438725b52f3SLisandro Dalcin 3439725b52f3SLisandro Dalcin PetscFunctionBegin; 34405f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 34419566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 34429566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 34439566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 34449566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 34459566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3446d0f46423SBarry Smith m = B->rmap->n / bs; 3447725b52f3SLisandro Dalcin 34485f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 34499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3450725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3451cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 34525f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3453725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3454725b52f3SLisandro Dalcin nnz[i] = nz; 3455725b52f3SLisandro Dalcin } 34569566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 34579566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3458725b52f3SLisandro Dalcin 3459725b52f3SLisandro Dalcin values = (PetscScalar *)V; 346048a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3461725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3462cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3463cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3464bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3465cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 34669566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 34673adadaf3SJed Brown } else { 34683adadaf3SJed Brown PetscInt j; 34693adadaf3SJed Brown for (j = 0; j < ncols; j++) { 34703adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 34719566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 34723adadaf3SJed Brown } 34733adadaf3SJed Brown } 3474725b52f3SLisandro Dalcin } 34759566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 34769566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 34779566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 34789566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3480725b52f3SLisandro Dalcin } 3481725b52f3SLisandro Dalcin 3482cda14afcSprj- /*@C 348311a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3484cda14afcSprj- 3485cda14afcSprj- Not Collective 3486cda14afcSprj- 3487cda14afcSprj- Input Parameter: 3488fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix 3489cda14afcSprj- 3490cda14afcSprj- Output Parameter: 3491cda14afcSprj- . array - pointer to the data 3492cda14afcSprj- 3493cda14afcSprj- Level: intermediate 3494cda14afcSprj- 34951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3496cda14afcSprj- @*/ 3497d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3498d71ae5a4SJacob Faibussowitsch { 3499cda14afcSprj- PetscFunctionBegin; 3500cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 35013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3502cda14afcSprj- } 3503cda14afcSprj- 3504cda14afcSprj- /*@C 350511a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3506cda14afcSprj- 3507cda14afcSprj- Not Collective 3508cda14afcSprj- 3509cda14afcSprj- Input Parameters: 3510fe59aa6dSJacob Faibussowitsch + A - a `MATSEQBAIJ` matrix 3511cda14afcSprj- - array - pointer to the data 3512cda14afcSprj- 3513cda14afcSprj- Level: intermediate 3514cda14afcSprj- 35151cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3516cda14afcSprj- @*/ 3517d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3518d71ae5a4SJacob Faibussowitsch { 3519cda14afcSprj- PetscFunctionBegin; 3520cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 35213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3522cda14afcSprj- } 3523cda14afcSprj- 35240bad9183SKris Buschelman /*MC 3525fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 35260bad9183SKris Buschelman block sparse compressed row format. 35270bad9183SKris Buschelman 35280bad9183SKris Buschelman Options Database Keys: 352920f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 35306679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 35310bad9183SKris Buschelman 35320bad9183SKris Buschelman Level: beginner 35330cd7f59aSBarry Smith 35340cd7f59aSBarry Smith Notes: 353511a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 353611a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 35370bad9183SKris Buschelman 35382ef1f0ffSBarry Smith Run with `-info` to see what version of the matrix-vector product is being used 35396679dcc1SBarry Smith 35401cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 35410bad9183SKris Buschelman M*/ 35420bad9183SKris Buschelman 3543cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3544b24902e0SBarry Smith 3545d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3546d71ae5a4SJacob Faibussowitsch { 3547c1ac3661SBarry Smith PetscMPIInt size; 3548b6490206SBarry Smith Mat_SeqBAIJ *b; 35493b2fbd54SBarry Smith 35503a40ed3dSBarry Smith PetscFunctionBegin; 35519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 35525f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3553b6490206SBarry Smith 35544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 3555b0a32e0cSBarry Smith B->data = (void *)b; 3556aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values; 355726fbe8dcSKarl Rupp 3558f4259b30SLisandro Dalcin b->row = NULL; 3559f4259b30SLisandro Dalcin b->col = NULL; 3560f4259b30SLisandro Dalcin b->icol = NULL; 35612593348eSBarry Smith b->reallocs = 0; 3562f4259b30SLisandro Dalcin b->saved_values = NULL; 35632593348eSBarry Smith 3564c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 35652593348eSBarry Smith b->nonew = 0; 3566f4259b30SLisandro Dalcin b->diag = NULL; 3567f4259b30SLisandro Dalcin B->spptr = NULL; 3568b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3569a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 35704e220ebcSLois Curfman McInnes 35719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 35729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 35739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 35749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 35759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 35769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 35779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 35789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 35799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 35809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 35817ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 35829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 35837ea3e4caSstefano_zampini #endif 35849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 35859566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 35863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35872593348eSBarry Smith } 35882593348eSBarry Smith 3589d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3590d71ae5a4SJacob Faibussowitsch { 3591b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3592a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3593de6a44a3SBarry Smith 35943a40ed3dSBarry Smith PetscFunctionBegin; 359531fe6a7dSBarry Smith PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 35965f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 35972593348eSBarry Smith 35984fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35994fd072dbSBarry Smith c->imax = a->imax; 36004fd072dbSBarry Smith c->ilen = a->ilen; 36014fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 36024fd072dbSBarry Smith } else { 36039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3604b6490206SBarry Smith for (i = 0; i < mbs; i++) { 36052593348eSBarry Smith c->imax[i] = a->imax[i]; 36062593348eSBarry Smith c->ilen[i] = a->ilen[i]; 36072593348eSBarry Smith } 36084fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 36094fd072dbSBarry Smith } 36102593348eSBarry Smith 36112593348eSBarry Smith /* allocate the matrix space */ 361216a2bf60SHong Zhang if (mallocmatspace) { 36134fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 36149566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 361526fbe8dcSKarl Rupp 36164fd072dbSBarry Smith c->i = a->i; 36174fd072dbSBarry Smith c->j = a->j; 3618379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3619379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3620379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 36214fd072dbSBarry Smith c->parent = A; 36221e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 36231e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 362426fbe8dcSKarl Rupp 36259566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 36269566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 36279566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 36284fd072dbSBarry Smith } else { 36299566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 363026fbe8dcSKarl Rupp 3631c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3632379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 36334fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 363426fbe8dcSKarl Rupp 36359566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3636b6490206SBarry Smith if (mbs > 0) { 36379566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 36382e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 36399566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 36402e8a6d31SBarry Smith } else { 36419566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 36422593348eSBarry Smith } 36432593348eSBarry Smith } 36441e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 36451e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 364616a2bf60SHong Zhang } 36474fd072dbSBarry Smith } 364816a2bf60SHong Zhang 36492593348eSBarry Smith c->roworiented = a->roworiented; 36502593348eSBarry Smith c->nonew = a->nonew; 365126fbe8dcSKarl Rupp 36529566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 36539566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 365426fbe8dcSKarl Rupp 36555c9eb25fSBarry Smith c->bs2 = a->bs2; 36565c9eb25fSBarry Smith c->mbs = a->mbs; 36575c9eb25fSBarry Smith c->nbs = a->nbs; 36582593348eSBarry Smith 36592593348eSBarry Smith if (a->diag) { 36604fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 36614fd072dbSBarry Smith c->diag = a->diag; 36624fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 36634fd072dbSBarry Smith } else { 36649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 366526fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 36664fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 36674fd072dbSBarry Smith } 3668f4259b30SLisandro Dalcin } else c->diag = NULL; 366926fbe8dcSKarl Rupp 36702593348eSBarry Smith c->nz = a->nz; 3671f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3672f361c04dSBarry Smith c->solve_work = NULL; 3673f361c04dSBarry Smith c->mult_work = NULL; 3674f361c04dSBarry Smith c->sor_workt = NULL; 3675f361c04dSBarry Smith c->sor_work = NULL; 367688e51ccdSHong Zhang 367788e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 367888e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3679cd6b891eSBarry Smith if (a->compressedrow.use) { 368088e51ccdSHong Zhang i = a->compressedrow.nrows; 36819566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 36829566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 36839566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 368488e51ccdSHong Zhang } else { 368588e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 36860298fd71SBarry Smith c->compressedrow.i = NULL; 36870298fd71SBarry Smith c->compressedrow.rindex = NULL; 368888e51ccdSHong Zhang } 3689c05f355bSMark Adams c->nonzerorowcnt = a->nonzerorowcnt; 3690e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 369126fbe8dcSKarl Rupp 36929566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 36933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36942593348eSBarry Smith } 36952593348eSBarry Smith 3696d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3697d71ae5a4SJacob Faibussowitsch { 3698b24902e0SBarry Smith PetscFunctionBegin; 36999566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 37009566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 37019566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 37029566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 37033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3704b24902e0SBarry Smith } 3705b24902e0SBarry Smith 3706618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3708d71ae5a4SJacob Faibussowitsch { 3709b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3710b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3711b51a4376SLisandro Dalcin PetscScalar *matvals; 3712b51a4376SLisandro Dalcin 3713b51a4376SLisandro Dalcin PetscFunctionBegin; 37149566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3715b51a4376SLisandro Dalcin 3716b51a4376SLisandro Dalcin /* read matrix header */ 37179566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 37185f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 37199371c9d4SSatish Balay M = header[1]; 37209371c9d4SSatish Balay N = header[2]; 37219371c9d4SSatish Balay nz = header[3]; 37225f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 37235f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 37245f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3725b51a4376SLisandro Dalcin 3726b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 37279566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3728b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3729b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3730b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3731b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3732b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 37339566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 37349566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3735b51a4376SLisandro Dalcin 3736b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 37379566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 37385f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 37399566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 37409566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 37419371c9d4SSatish Balay mbs = m / bs; 37429371c9d4SSatish Balay nbs = n / bs; 3743b51a4376SLisandro Dalcin 3744b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 37459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 37469566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 37479371c9d4SSatish Balay rowidxs[0] = 0; 37489371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3749b51a4376SLisandro Dalcin sum = rowidxs[m]; 37505f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3751b51a4376SLisandro Dalcin 3752b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 37539566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 37549566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 37559566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3756b51a4376SLisandro Dalcin 3757b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3758b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3759b51a4376SLisandro Dalcin PetscInt *nnz; 3760618cc2edSLisandro Dalcin PetscBool sbaij; 3761b51a4376SLisandro Dalcin 37629566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 37639566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 37649566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3765b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 37669566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3767618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3768618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3769618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3770618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3771618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3772618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3773618cc2edSLisandro Dalcin } 3774618cc2edSLisandro Dalcin } 3775b51a4376SLisandro Dalcin } 37769566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 37779566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 37789566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 37799566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3780b51a4376SLisandro Dalcin } 3781b51a4376SLisandro Dalcin 3782b51a4376SLisandro Dalcin /* store matrix values */ 3783b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3784b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 37859927e4dfSBarry Smith PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3786b51a4376SLisandro Dalcin } 3787b51a4376SLisandro Dalcin 37889566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 37899566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 37909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 37919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 37923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3793b51a4376SLisandro Dalcin } 3794b51a4376SLisandro Dalcin 3795d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3796d71ae5a4SJacob Faibussowitsch { 37977f489da9SVaclav Hapla PetscBool isbinary; 3798f501eaabSShri Abhyankar 3799f501eaabSShri Abhyankar PetscFunctionBegin; 38009566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 38015f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 38029566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 38033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3804f501eaabSShri Abhyankar } 3805f501eaabSShri Abhyankar 3806273d9f13SBarry Smith /*@C 380711a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3808273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 380920f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 381020f4b53cSBarry Smith (or the array `nnz`). 38112593348eSBarry Smith 3812d083f849SBarry Smith Collective 3813273d9f13SBarry Smith 3814273d9f13SBarry Smith Input Parameters: 381511a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 381611a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 381711a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3818273d9f13SBarry Smith . m - number of rows 3819273d9f13SBarry Smith . n - number of columns 382035d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 382135d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 382220f4b53cSBarry Smith (possibly different for each block row) or `NULL` 3823273d9f13SBarry Smith 3824273d9f13SBarry Smith Output Parameter: 3825273d9f13SBarry Smith . A - the matrix 3826273d9f13SBarry Smith 3827273d9f13SBarry Smith Options Database Keys: 382811a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3829a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3830273d9f13SBarry Smith 3831273d9f13SBarry Smith Level: intermediate 3832273d9f13SBarry Smith 3833273d9f13SBarry Smith Notes: 383477433607SBarry Smith It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 38352ef1f0ffSBarry Smith MatXXXXSetPreallocation() paradigm instead of this routine directly. 38362ef1f0ffSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 38372ef1f0ffSBarry Smith 3838d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3839d1be2dadSMatthew Knepley 38402ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 384149a6f317SBarry Smith 384235d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 384335d8aa7fSBarry Smith 38442ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3845273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 384620f4b53cSBarry Smith either one (as in Fortran) or zero. 3847273d9f13SBarry Smith 38482ef1f0ffSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 38492ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3850651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3851273d9f13SBarry Smith matrices. 3852273d9f13SBarry Smith 38531cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3854273d9f13SBarry Smith @*/ 3855d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3856d71ae5a4SJacob Faibussowitsch { 3857273d9f13SBarry Smith PetscFunctionBegin; 38589566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 38599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 38609566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 38619566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 38623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3863273d9f13SBarry Smith } 3864273d9f13SBarry Smith 3865273d9f13SBarry Smith /*@C 3866273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3867273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 386820f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 386920f4b53cSBarry Smith (or the array `nnz`). 3870273d9f13SBarry Smith 3871d083f849SBarry Smith Collective 3872273d9f13SBarry Smith 3873273d9f13SBarry Smith Input Parameters: 38741c4f3114SJed Brown + B - the matrix 387511a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 387611a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3877273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3878273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 38792ef1f0ffSBarry Smith (possibly different for each block row) or `NULL` 3880273d9f13SBarry Smith 3881273d9f13SBarry Smith Options Database Keys: 388211a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3883a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3884273d9f13SBarry Smith 3885273d9f13SBarry Smith Level: intermediate 3886273d9f13SBarry Smith 3887273d9f13SBarry Smith Notes: 38882ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 388949a6f317SBarry Smith 389011a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 3891aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 389220f4b53cSBarry Smith You can also run with the option `-info` and look for messages with the string 3893aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3894aa95bbe8SBarry Smith 38952ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3896273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 389720f4b53cSBarry Smith either one (as in Fortran) or zero. 3898273d9f13SBarry Smith 3899*d8a51d2aSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 39002ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3901651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3902273d9f13SBarry Smith 39031cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3904273d9f13SBarry Smith @*/ 3905d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3906d71ae5a4SJacob Faibussowitsch { 3907273d9f13SBarry Smith PetscFunctionBegin; 39086ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 39096ba663aaSJed Brown PetscValidType(B, 1); 39106ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3911cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 39123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3913273d9f13SBarry Smith } 3914a1d92eedSBarry Smith 3915725b52f3SLisandro Dalcin /*@C 391611a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3917725b52f3SLisandro Dalcin 3918d083f849SBarry Smith Collective 3919725b52f3SLisandro Dalcin 3920725b52f3SLisandro Dalcin Input Parameters: 39211c4f3114SJed Brown + B - the matrix 392220f4b53cSBarry Smith . bs - the blocksize 3923*d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero) 3924*d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3925*d8a51d2aSBarry Smith - v - optional values in the matrix, use `NULL` if not provided 3926725b52f3SLisandro Dalcin 3927664954b6SBarry Smith Level: advanced 3928725b52f3SLisandro Dalcin 39293adadaf3SJed Brown Notes: 3930*d8a51d2aSBarry Smith The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3931*d8a51d2aSBarry Smith 393211a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 393311a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 39343adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 393511a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 39363adadaf3SJed Brown block column and the second index is over columns within a block. 39373adadaf3SJed Brown 3938664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3939664954b6SBarry Smith 39401cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3941725b52f3SLisandro Dalcin @*/ 3942d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3943d71ae5a4SJacob Faibussowitsch { 3944725b52f3SLisandro Dalcin PetscFunctionBegin; 39456ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 39466ba663aaSJed Brown PetscValidType(B, 1); 39476ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3948cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 39493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3950725b52f3SLisandro Dalcin } 3951725b52f3SLisandro Dalcin 3952c75a6043SHong Zhang /*@ 395311a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3954c75a6043SHong Zhang 3955d083f849SBarry Smith Collective 3956c75a6043SHong Zhang 3957c75a6043SHong Zhang Input Parameters: 3958c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3959c75a6043SHong Zhang . bs - size of block 3960c75a6043SHong Zhang . m - number of rows 3961c75a6043SHong Zhang . n - number of columns 3962483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3963c75a6043SHong Zhang . j - column indices 3964c75a6043SHong Zhang - a - matrix values 3965c75a6043SHong Zhang 3966c75a6043SHong Zhang Output Parameter: 3967c75a6043SHong Zhang . mat - the matrix 3968c75a6043SHong Zhang 3969dfb205c3SBarry Smith Level: advanced 3970c75a6043SHong Zhang 3971c75a6043SHong Zhang Notes: 39722ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3973c75a6043SHong Zhang once the matrix is destroyed 3974c75a6043SHong Zhang 3975c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3976c75a6043SHong Zhang 39772ef1f0ffSBarry Smith The `i` and `j` indices are 0 based 3978c75a6043SHong Zhang 397911a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3980dfb205c3SBarry Smith 39813adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 39823adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 39833adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 39843adadaf3SJed Brown with column-major ordering within blocks. 3985dfb205c3SBarry Smith 39861cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3987c75a6043SHong Zhang @*/ 3988d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3989d71ae5a4SJacob Faibussowitsch { 3990c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3991c75a6043SHong Zhang 3992c75a6043SHong Zhang PetscFunctionBegin; 39935f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 39945f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3995c75a6043SHong Zhang 39969566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 39979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 39989566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 39999566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 4000c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 40019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 4002c75a6043SHong Zhang 4003c75a6043SHong Zhang baij->i = i; 4004c75a6043SHong Zhang baij->j = j; 4005c75a6043SHong Zhang baij->a = a; 400626fbe8dcSKarl Rupp 4007c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 4008c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 4009e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 4010e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 4011ceb5bf51SJacob Faibussowitsch baij->free_imax_ilen = PETSC_TRUE; 4012c75a6043SHong Zhang 4013ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < m; ii++) { 4014ceb5bf51SJacob Faibussowitsch const PetscInt row_len = i[ii + 1] - i[ii]; 4015ceb5bf51SJacob Faibussowitsch 4016ceb5bf51SJacob Faibussowitsch baij->ilen[ii] = baij->imax[ii] = row_len; 4017ceb5bf51SJacob Faibussowitsch PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 4018c75a6043SHong Zhang } 401976bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 4020ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 40216bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 40226bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 4023c75a6043SHong Zhang } 402476bd3646SJed Brown } 4025c75a6043SHong Zhang 40269566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 40279566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 40283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4029c75a6043SHong Zhang } 4030bdf6f3fcSHong Zhang 4031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4032d71ae5a4SJacob Faibussowitsch { 4033bdf6f3fcSHong Zhang PetscFunctionBegin; 40349566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 40353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4036bdf6f3fcSHong Zhang } 4037