12593348eSBarry Smith /* 2b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 32593348eSBarry Smith matrix storage format. 42593348eSBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6c6db04a5SJed Brown #include <petscblaslapack.h> 7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 943516a2dSKris Buschelman 1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 1126cec326SBarry Smith #define TYPE BAIJ 1226cec326SBarry Smith #define TYPE_BS 1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1426cec326SBarry Smith #undef TYPE_BS 1526cec326SBarry Smith #define TYPE_BS _BS 1626cec326SBarry Smith #define TYPE_BS_ON 1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1826cec326SBarry Smith #undef TYPE_BS 1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h" 2026cec326SBarry Smith #undef TYPE 2126cec326SBarry Smith #undef TYPE_BS_ON 2226cec326SBarry Smith 237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 257ea3e4caSstefano_zampini #endif 267ea3e4caSstefano_zampini 27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29b5b72c8aSIrina Sokolova #endif 30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31b5b72c8aSIrina Sokolova 32*421480d9SBarry Smith MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs) 33*421480d9SBarry Smith 34ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 35d71ae5a4SJacob Faibussowitsch { 369463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 37ff6a9541SJacob Faibussowitsch PetscInt m, n, ib, jb, bs = A->rmap->bs; 389463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 399463ebdaSPierre Jolivet 409463ebdaSPierre Jolivet PetscFunctionBegin; 419566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 42ff6a9541SJacob Faibussowitsch PetscCall(PetscArrayzero(reductions, n)); 439463ebdaSPierre Jolivet if (type == NORM_2) { 44ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 459463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 469463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 47857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 489463ebdaSPierre Jolivet a_val++; 499463ebdaSPierre Jolivet } 509463ebdaSPierre Jolivet } 519463ebdaSPierre Jolivet } 529463ebdaSPierre Jolivet } else if (type == NORM_1) { 53ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 549463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 559463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 56857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 579463ebdaSPierre Jolivet a_val++; 589463ebdaSPierre Jolivet } 599463ebdaSPierre Jolivet } 609463ebdaSPierre Jolivet } 619463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 62ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 639463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 649463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 656497c311SBarry Smith PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 66857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 679463ebdaSPierre Jolivet a_val++; 689463ebdaSPierre Jolivet } 699463ebdaSPierre Jolivet } 709463ebdaSPierre Jolivet } 71857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 72ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 73857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 74857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 75857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 76857cbf51SRichard Tran Mills a_val++; 77857cbf51SRichard Tran Mills } 78857cbf51SRichard Tran Mills } 79857cbf51SRichard Tran Mills } 80857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 82857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 83857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 84857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 85857cbf51SRichard Tran Mills a_val++; 86857cbf51SRichard Tran Mills } 87857cbf51SRichard Tran Mills } 88857cbf51SRichard Tran Mills } 89857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 909463ebdaSPierre Jolivet if (type == NORM_2) { 91ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 92857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 93ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 949463ebdaSPierre Jolivet } 953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 969463ebdaSPierre Jolivet } 979463ebdaSPierre Jolivet 9866976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 99d71ae5a4SJacob Faibussowitsch { 100b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 101*421480d9SBarry Smith PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 1027f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 10362bba022SBarry Smith PetscReal shift = 0.0; 1041a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 105*421480d9SBarry Smith const PetscInt *adiag; 106b01c7715SBarry Smith 107b01c7715SBarry Smith PetscFunctionBegin; 108a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 109a455e926SHong Zhang 1109797317bSBarry Smith if (a->idiagvalid) { 1119797317bSBarry Smith if (values) *values = a->idiag; 1123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1139797317bSBarry Smith } 114*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL)); 1153a7d0413SPierre Jolivet if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 116b01c7715SBarry Smith diag = a->idiag; 117bbead8a2SBarry Smith if (values) *values = a->idiag; 118b01c7715SBarry Smith /* factor and invert each block */ 119521d7252SBarry Smith switch (bs) { 120ab040260SJed Brown case 1: 121ab040260SJed Brown for (i = 0; i < mbs; i++) { 122*421480d9SBarry Smith odiag = v + 1 * adiag[i]; 123ab040260SJed Brown diag[0] = odiag[0]; 124ec1892c8SHong Zhang 125ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 126966bd95aSPierre Jolivet PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 1277b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1287b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1297b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 131ec1892c8SHong Zhang } 132ec1892c8SHong Zhang 133d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134ab040260SJed Brown diag += 1; 135ab040260SJed Brown } 136ab040260SJed Brown break; 137b01c7715SBarry Smith case 2: 138b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 139*421480d9SBarry Smith odiag = v + 4 * adiag[i]; 1409371c9d4SSatish Balay diag[0] = odiag[0]; 1419371c9d4SSatish Balay diag[1] = odiag[1]; 1429371c9d4SSatish Balay diag[2] = odiag[2]; 1439371c9d4SSatish Balay diag[3] = odiag[3]; 1449566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1457b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146b01c7715SBarry Smith diag += 4; 147b01c7715SBarry Smith } 148b01c7715SBarry Smith break; 149b01c7715SBarry Smith case 3: 150b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 151*421480d9SBarry Smith odiag = v + 9 * adiag[i]; 1529371c9d4SSatish Balay diag[0] = odiag[0]; 1539371c9d4SSatish Balay diag[1] = odiag[1]; 1549371c9d4SSatish Balay diag[2] = odiag[2]; 1559371c9d4SSatish Balay diag[3] = odiag[3]; 1569371c9d4SSatish Balay diag[4] = odiag[4]; 1579371c9d4SSatish Balay diag[5] = odiag[5]; 1589371c9d4SSatish Balay diag[6] = odiag[6]; 1599371c9d4SSatish Balay diag[7] = odiag[7]; 160b01c7715SBarry Smith diag[8] = odiag[8]; 1619566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1627b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163b01c7715SBarry Smith diag += 9; 164b01c7715SBarry Smith } 165b01c7715SBarry Smith break; 166b01c7715SBarry Smith case 4: 167b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 168*421480d9SBarry Smith odiag = v + 16 * adiag[i]; 1699566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1709566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1717b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172b01c7715SBarry Smith diag += 16; 173b01c7715SBarry Smith } 174b01c7715SBarry Smith break; 175b01c7715SBarry Smith case 5: 176b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 177*421480d9SBarry Smith odiag = v + 25 * adiag[i]; 1789566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1807b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181b01c7715SBarry Smith diag += 25; 182b01c7715SBarry Smith } 183b01c7715SBarry Smith break; 184d49b2adcSBarry Smith case 6: 185d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 186*421480d9SBarry Smith odiag = v + 36 * adiag[i]; 1879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1889566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1897b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190d49b2adcSBarry Smith diag += 36; 191d49b2adcSBarry Smith } 192d49b2adcSBarry Smith break; 193de80f912SBarry Smith case 7: 194de80f912SBarry Smith for (i = 0; i < mbs; i++) { 195*421480d9SBarry Smith odiag = v + 49 * adiag[i]; 1969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1979566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1987b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199de80f912SBarry Smith diag += 49; 200de80f912SBarry Smith } 201de80f912SBarry Smith break; 202b01c7715SBarry Smith default: 2039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204de80f912SBarry Smith for (i = 0; i < mbs; i++) { 205*421480d9SBarry Smith odiag = v + bs2 * adiag[i]; 2069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 2079566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 2087b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209de80f912SBarry Smith diag += bs2; 210de80f912SBarry Smith } 2119566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 212b01c7715SBarry Smith } 213b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 2143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 215b01c7715SBarry Smith } 216b01c7715SBarry Smith 21766976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218d71ae5a4SJacob Faibussowitsch { 2196d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 221e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 222e48d15efSToby Isaac const PetscScalar *b, *xb; 2235455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226b01c7715SBarry Smith 227b01c7715SBarry Smith PetscFunctionBegin; 228b01c7715SBarry Smith its = its * lits; 2295f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2305f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2315f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2325f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2335f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234b01c7715SBarry Smith 2359566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236b01c7715SBarry Smith 2373ba16761SJacob Faibussowitsch if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238b01c7715SBarry Smith diag = a->diag; 239b01c7715SBarry Smith idiag = a->idiag; 240de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 24148a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 24248a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 24348a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2443475c22fSBarry Smith work = a->mult_work; 2453475c22fSBarry Smith t = a->sor_workt; 246de80f912SBarry Smith w = a->sor_work; 247de80f912SBarry Smith 2489566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 250de80f912SBarry Smith 251de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 252de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253e48d15efSToby Isaac switch (bs) { 254e48d15efSToby Isaac case 1: 255e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256e48d15efSToby Isaac t[0] = b[0]; 257e48d15efSToby Isaac i2 = 1; 258e48d15efSToby Isaac idiag += 1; 259e48d15efSToby Isaac for (i = 1; i < m; i++) { 260e48d15efSToby Isaac v = aa + ai[i]; 261e48d15efSToby Isaac vi = aj + ai[i]; 262e48d15efSToby Isaac nz = diag[i] - ai[i]; 263e48d15efSToby Isaac s[0] = b[i2]; 264e48d15efSToby Isaac for (j = 0; j < nz; j++) { 265e48d15efSToby Isaac xw[0] = x[vi[j]]; 266e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267e48d15efSToby Isaac } 268e48d15efSToby Isaac t[i2] = s[0]; 269e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270e48d15efSToby Isaac x[i2] = xw[0]; 271e48d15efSToby Isaac idiag += 1; 272e48d15efSToby Isaac i2 += 1; 273e48d15efSToby Isaac } 274e48d15efSToby Isaac break; 275e48d15efSToby Isaac case 2: 276e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2779371c9d4SSatish Balay t[0] = b[0]; 2789371c9d4SSatish Balay t[1] = b[1]; 279e48d15efSToby Isaac i2 = 2; 280e48d15efSToby Isaac idiag += 4; 281e48d15efSToby Isaac for (i = 1; i < m; i++) { 282e48d15efSToby Isaac v = aa + 4 * ai[i]; 283e48d15efSToby Isaac vi = aj + ai[i]; 284e48d15efSToby Isaac nz = diag[i] - ai[i]; 2859371c9d4SSatish Balay s[0] = b[i2]; 2869371c9d4SSatish Balay s[1] = b[i2 + 1]; 287e48d15efSToby Isaac for (j = 0; j < nz; j++) { 288e48d15efSToby Isaac idx = 2 * vi[j]; 289e48d15efSToby Isaac it = 4 * j; 2909371c9d4SSatish Balay xw[0] = x[idx]; 2919371c9d4SSatish Balay xw[1] = x[1 + idx]; 292e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293e48d15efSToby Isaac } 2949371c9d4SSatish Balay t[i2] = s[0]; 2959371c9d4SSatish Balay t[i2 + 1] = s[1]; 296e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2979371c9d4SSatish Balay x[i2] = xw[0]; 2989371c9d4SSatish Balay x[i2 + 1] = xw[1]; 299e48d15efSToby Isaac idiag += 4; 300e48d15efSToby Isaac i2 += 2; 301e48d15efSToby Isaac } 302e48d15efSToby Isaac break; 303e48d15efSToby Isaac case 3: 304e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 3059371c9d4SSatish Balay t[0] = b[0]; 3069371c9d4SSatish Balay t[1] = b[1]; 3079371c9d4SSatish Balay t[2] = b[2]; 308e48d15efSToby Isaac i2 = 3; 309e48d15efSToby Isaac idiag += 9; 310e48d15efSToby Isaac for (i = 1; i < m; i++) { 311e48d15efSToby Isaac v = aa + 9 * ai[i]; 312e48d15efSToby Isaac vi = aj + ai[i]; 313e48d15efSToby Isaac nz = diag[i] - ai[i]; 3149371c9d4SSatish Balay s[0] = b[i2]; 3159371c9d4SSatish Balay s[1] = b[i2 + 1]; 3169371c9d4SSatish Balay s[2] = b[i2 + 2]; 317e48d15efSToby Isaac while (nz--) { 318e48d15efSToby Isaac idx = 3 * (*vi++); 3199371c9d4SSatish Balay xw[0] = x[idx]; 3209371c9d4SSatish Balay xw[1] = x[1 + idx]; 3219371c9d4SSatish Balay xw[2] = x[2 + idx]; 322e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323e48d15efSToby Isaac v += 9; 324e48d15efSToby Isaac } 3259371c9d4SSatish Balay t[i2] = s[0]; 3269371c9d4SSatish Balay t[i2 + 1] = s[1]; 3279371c9d4SSatish Balay t[i2 + 2] = s[2]; 328e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3299371c9d4SSatish Balay x[i2] = xw[0]; 3309371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3319371c9d4SSatish Balay x[i2 + 2] = xw[2]; 332e48d15efSToby Isaac idiag += 9; 333e48d15efSToby Isaac i2 += 3; 334e48d15efSToby Isaac } 335e48d15efSToby Isaac break; 336e48d15efSToby Isaac case 4: 337e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3389371c9d4SSatish Balay t[0] = b[0]; 3399371c9d4SSatish Balay t[1] = b[1]; 3409371c9d4SSatish Balay t[2] = b[2]; 3419371c9d4SSatish Balay t[3] = b[3]; 342e48d15efSToby Isaac i2 = 4; 343e48d15efSToby Isaac idiag += 16; 344e48d15efSToby Isaac for (i = 1; i < m; i++) { 345e48d15efSToby Isaac v = aa + 16 * ai[i]; 346e48d15efSToby Isaac vi = aj + ai[i]; 347e48d15efSToby Isaac nz = diag[i] - ai[i]; 3489371c9d4SSatish Balay s[0] = b[i2]; 3499371c9d4SSatish Balay s[1] = b[i2 + 1]; 3509371c9d4SSatish Balay s[2] = b[i2 + 2]; 3519371c9d4SSatish Balay s[3] = b[i2 + 3]; 352e48d15efSToby Isaac while (nz--) { 353e48d15efSToby Isaac idx = 4 * (*vi++); 3549371c9d4SSatish Balay xw[0] = x[idx]; 3559371c9d4SSatish Balay xw[1] = x[1 + idx]; 3569371c9d4SSatish Balay xw[2] = x[2 + idx]; 3579371c9d4SSatish Balay xw[3] = x[3 + idx]; 358e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359e48d15efSToby Isaac v += 16; 360e48d15efSToby Isaac } 3619371c9d4SSatish Balay t[i2] = s[0]; 3629371c9d4SSatish Balay t[i2 + 1] = s[1]; 3639371c9d4SSatish Balay t[i2 + 2] = s[2]; 3649371c9d4SSatish Balay t[i2 + 3] = s[3]; 365e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3669371c9d4SSatish Balay x[i2] = xw[0]; 3679371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3689371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3699371c9d4SSatish Balay x[i2 + 3] = xw[3]; 370e48d15efSToby Isaac idiag += 16; 371e48d15efSToby Isaac i2 += 4; 372e48d15efSToby Isaac } 373e48d15efSToby Isaac break; 374e48d15efSToby Isaac case 5: 375e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3769371c9d4SSatish Balay t[0] = b[0]; 3779371c9d4SSatish Balay t[1] = b[1]; 3789371c9d4SSatish Balay t[2] = b[2]; 3799371c9d4SSatish Balay t[3] = b[3]; 3809371c9d4SSatish Balay t[4] = b[4]; 381e48d15efSToby Isaac i2 = 5; 382e48d15efSToby Isaac idiag += 25; 383e48d15efSToby Isaac for (i = 1; i < m; i++) { 384e48d15efSToby Isaac v = aa + 25 * ai[i]; 385e48d15efSToby Isaac vi = aj + ai[i]; 386e48d15efSToby Isaac nz = diag[i] - ai[i]; 3879371c9d4SSatish Balay s[0] = b[i2]; 3889371c9d4SSatish Balay s[1] = b[i2 + 1]; 3899371c9d4SSatish Balay s[2] = b[i2 + 2]; 3909371c9d4SSatish Balay s[3] = b[i2 + 3]; 3919371c9d4SSatish Balay s[4] = b[i2 + 4]; 392e48d15efSToby Isaac while (nz--) { 393e48d15efSToby Isaac idx = 5 * (*vi++); 3949371c9d4SSatish Balay xw[0] = x[idx]; 3959371c9d4SSatish Balay xw[1] = x[1 + idx]; 3969371c9d4SSatish Balay xw[2] = x[2 + idx]; 3979371c9d4SSatish Balay xw[3] = x[3 + idx]; 3989371c9d4SSatish Balay xw[4] = x[4 + idx]; 399e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400e48d15efSToby Isaac v += 25; 401e48d15efSToby Isaac } 4029371c9d4SSatish Balay t[i2] = s[0]; 4039371c9d4SSatish Balay t[i2 + 1] = s[1]; 4049371c9d4SSatish Balay t[i2 + 2] = s[2]; 4059371c9d4SSatish Balay t[i2 + 3] = s[3]; 4069371c9d4SSatish Balay t[i2 + 4] = s[4]; 407e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 4089371c9d4SSatish Balay x[i2] = xw[0]; 4099371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4109371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4119371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4129371c9d4SSatish Balay x[i2 + 4] = xw[4]; 413e48d15efSToby Isaac idiag += 25; 414e48d15efSToby Isaac i2 += 5; 415e48d15efSToby Isaac } 416e48d15efSToby Isaac break; 417e48d15efSToby Isaac case 6: 418e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4199371c9d4SSatish Balay t[0] = b[0]; 4209371c9d4SSatish Balay t[1] = b[1]; 4219371c9d4SSatish Balay t[2] = b[2]; 4229371c9d4SSatish Balay t[3] = b[3]; 4239371c9d4SSatish Balay t[4] = b[4]; 4249371c9d4SSatish Balay t[5] = b[5]; 425e48d15efSToby Isaac i2 = 6; 426e48d15efSToby Isaac idiag += 36; 427e48d15efSToby Isaac for (i = 1; i < m; i++) { 428e48d15efSToby Isaac v = aa + 36 * ai[i]; 429e48d15efSToby Isaac vi = aj + ai[i]; 430e48d15efSToby Isaac nz = diag[i] - ai[i]; 4319371c9d4SSatish Balay s[0] = b[i2]; 4329371c9d4SSatish Balay s[1] = b[i2 + 1]; 4339371c9d4SSatish Balay s[2] = b[i2 + 2]; 4349371c9d4SSatish Balay s[3] = b[i2 + 3]; 4359371c9d4SSatish Balay s[4] = b[i2 + 4]; 4369371c9d4SSatish Balay s[5] = b[i2 + 5]; 437e48d15efSToby Isaac while (nz--) { 438e48d15efSToby Isaac idx = 6 * (*vi++); 4399371c9d4SSatish Balay xw[0] = x[idx]; 4409371c9d4SSatish Balay xw[1] = x[1 + idx]; 4419371c9d4SSatish Balay xw[2] = x[2 + idx]; 4429371c9d4SSatish Balay xw[3] = x[3 + idx]; 4439371c9d4SSatish Balay xw[4] = x[4 + idx]; 4449371c9d4SSatish Balay xw[5] = x[5 + idx]; 445e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446e48d15efSToby Isaac v += 36; 447e48d15efSToby Isaac } 4489371c9d4SSatish Balay t[i2] = s[0]; 4499371c9d4SSatish Balay t[i2 + 1] = s[1]; 4509371c9d4SSatish Balay t[i2 + 2] = s[2]; 4519371c9d4SSatish Balay t[i2 + 3] = s[3]; 4529371c9d4SSatish Balay t[i2 + 4] = s[4]; 4539371c9d4SSatish Balay t[i2 + 5] = s[5]; 454e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4559371c9d4SSatish Balay x[i2] = xw[0]; 4569371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4579371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4589371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4599371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4609371c9d4SSatish Balay x[i2 + 5] = xw[5]; 461e48d15efSToby Isaac idiag += 36; 462e48d15efSToby Isaac i2 += 6; 463e48d15efSToby Isaac } 464e48d15efSToby Isaac break; 465e48d15efSToby Isaac case 7: 466e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4679371c9d4SSatish Balay t[0] = b[0]; 4689371c9d4SSatish Balay t[1] = b[1]; 4699371c9d4SSatish Balay t[2] = b[2]; 4709371c9d4SSatish Balay t[3] = b[3]; 4719371c9d4SSatish Balay t[4] = b[4]; 4729371c9d4SSatish Balay t[5] = b[5]; 4739371c9d4SSatish Balay t[6] = b[6]; 474e48d15efSToby Isaac i2 = 7; 475e48d15efSToby Isaac idiag += 49; 476e48d15efSToby Isaac for (i = 1; i < m; i++) { 477e48d15efSToby Isaac v = aa + 49 * ai[i]; 478e48d15efSToby Isaac vi = aj + ai[i]; 479e48d15efSToby Isaac nz = diag[i] - ai[i]; 4809371c9d4SSatish Balay s[0] = b[i2]; 4819371c9d4SSatish Balay s[1] = b[i2 + 1]; 4829371c9d4SSatish Balay s[2] = b[i2 + 2]; 4839371c9d4SSatish Balay s[3] = b[i2 + 3]; 4849371c9d4SSatish Balay s[4] = b[i2 + 4]; 4859371c9d4SSatish Balay s[5] = b[i2 + 5]; 4869371c9d4SSatish Balay s[6] = b[i2 + 6]; 487e48d15efSToby Isaac while (nz--) { 488e48d15efSToby Isaac idx = 7 * (*vi++); 4899371c9d4SSatish Balay xw[0] = x[idx]; 4909371c9d4SSatish Balay xw[1] = x[1 + idx]; 4919371c9d4SSatish Balay xw[2] = x[2 + idx]; 4929371c9d4SSatish Balay xw[3] = x[3 + idx]; 4939371c9d4SSatish Balay xw[4] = x[4 + idx]; 4949371c9d4SSatish Balay xw[5] = x[5 + idx]; 4959371c9d4SSatish Balay xw[6] = x[6 + idx]; 496e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497e48d15efSToby Isaac v += 49; 498e48d15efSToby Isaac } 4999371c9d4SSatish Balay t[i2] = s[0]; 5009371c9d4SSatish Balay t[i2 + 1] = s[1]; 5019371c9d4SSatish Balay t[i2 + 2] = s[2]; 5029371c9d4SSatish Balay t[i2 + 3] = s[3]; 5039371c9d4SSatish Balay t[i2 + 4] = s[4]; 5049371c9d4SSatish Balay t[i2 + 5] = s[5]; 5059371c9d4SSatish Balay t[i2 + 6] = s[6]; 506e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 5079371c9d4SSatish Balay x[i2] = xw[0]; 5089371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5099371c9d4SSatish Balay x[i2 + 2] = xw[2]; 5109371c9d4SSatish Balay x[i2 + 3] = xw[3]; 5119371c9d4SSatish Balay x[i2 + 4] = xw[4]; 5129371c9d4SSatish Balay x[i2 + 5] = xw[5]; 5139371c9d4SSatish Balay x[i2 + 6] = xw[6]; 514e48d15efSToby Isaac idiag += 49; 515e48d15efSToby Isaac i2 += 7; 516e48d15efSToby Isaac } 517e48d15efSToby Isaac break; 518e48d15efSToby Isaac default: 51996b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 521de80f912SBarry Smith i2 = bs; 522de80f912SBarry Smith idiag += bs2; 523de80f912SBarry Smith for (i = 1; i < m; i++) { 524de80f912SBarry Smith v = aa + bs2 * ai[i]; 525de80f912SBarry Smith vi = aj + ai[i]; 526de80f912SBarry Smith nz = diag[i] - ai[i]; 527de80f912SBarry Smith 5289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 529de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 530de80f912SBarry Smith workt = work; 531de80f912SBarry Smith for (j = 0; j < nz; j++) { 5329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533de80f912SBarry Smith workt += bs; 534de80f912SBarry Smith } 53596b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 53796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538de80f912SBarry Smith 539de80f912SBarry Smith idiag += bs2; 540de80f912SBarry Smith i2 += bs; 541de80f912SBarry Smith } 542e48d15efSToby Isaac break; 543e48d15efSToby Isaac } 544de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546e48d15efSToby Isaac xb = t; 5479371c9d4SSatish Balay } else xb = b; 548de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 550e48d15efSToby Isaac i2 = bs * (m - 1); 551e48d15efSToby Isaac switch (bs) { 552e48d15efSToby Isaac case 1: 553e48d15efSToby Isaac s[0] = xb[i2]; 554e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555e48d15efSToby Isaac x[i2] = xw[0]; 556e48d15efSToby Isaac i2 -= 1; 557e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 558e48d15efSToby Isaac v = aa + (diag[i] + 1); 559e48d15efSToby Isaac vi = aj + diag[i] + 1; 560e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 561e48d15efSToby Isaac s[0] = xb[i2]; 562e48d15efSToby Isaac for (j = 0; j < nz; j++) { 563e48d15efSToby Isaac xw[0] = x[vi[j]]; 564e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565e48d15efSToby Isaac } 566e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567e48d15efSToby Isaac x[i2] = xw[0]; 568e48d15efSToby Isaac idiag -= 1; 569e48d15efSToby Isaac i2 -= 1; 570e48d15efSToby Isaac } 571e48d15efSToby Isaac break; 572e48d15efSToby Isaac case 2: 5739371c9d4SSatish Balay s[0] = xb[i2]; 5749371c9d4SSatish Balay s[1] = xb[i2 + 1]; 575e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5769371c9d4SSatish Balay x[i2] = xw[0]; 5779371c9d4SSatish Balay x[i2 + 1] = xw[1]; 578e48d15efSToby Isaac i2 -= 2; 579e48d15efSToby Isaac idiag -= 4; 580e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 581e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 582e48d15efSToby Isaac vi = aj + diag[i] + 1; 583e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5849371c9d4SSatish Balay s[0] = xb[i2]; 5859371c9d4SSatish Balay s[1] = xb[i2 + 1]; 586e48d15efSToby Isaac for (j = 0; j < nz; j++) { 587e48d15efSToby Isaac idx = 2 * vi[j]; 588e48d15efSToby Isaac it = 4 * j; 5899371c9d4SSatish Balay xw[0] = x[idx]; 5909371c9d4SSatish Balay xw[1] = x[1 + idx]; 591e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592e48d15efSToby Isaac } 593e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5949371c9d4SSatish Balay x[i2] = xw[0]; 5959371c9d4SSatish Balay x[i2 + 1] = xw[1]; 596e48d15efSToby Isaac idiag -= 4; 597e48d15efSToby Isaac i2 -= 2; 598e48d15efSToby Isaac } 599e48d15efSToby Isaac break; 600e48d15efSToby Isaac case 3: 6019371c9d4SSatish Balay s[0] = xb[i2]; 6029371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6039371c9d4SSatish Balay s[2] = xb[i2 + 2]; 604e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6059371c9d4SSatish Balay x[i2] = xw[0]; 6069371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6079371c9d4SSatish Balay x[i2 + 2] = xw[2]; 608e48d15efSToby Isaac i2 -= 3; 609e48d15efSToby Isaac idiag -= 9; 610e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 611e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 612e48d15efSToby Isaac vi = aj + diag[i] + 1; 613e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6149371c9d4SSatish Balay s[0] = xb[i2]; 6159371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6169371c9d4SSatish Balay s[2] = xb[i2 + 2]; 617e48d15efSToby Isaac while (nz--) { 618e48d15efSToby Isaac idx = 3 * (*vi++); 6199371c9d4SSatish Balay xw[0] = x[idx]; 6209371c9d4SSatish Balay xw[1] = x[1 + idx]; 6219371c9d4SSatish Balay xw[2] = x[2 + idx]; 622e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623e48d15efSToby Isaac v += 9; 624e48d15efSToby Isaac } 625e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6269371c9d4SSatish Balay x[i2] = xw[0]; 6279371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6289371c9d4SSatish Balay x[i2 + 2] = xw[2]; 629e48d15efSToby Isaac idiag -= 9; 630e48d15efSToby Isaac i2 -= 3; 631e48d15efSToby Isaac } 632e48d15efSToby Isaac break; 633e48d15efSToby Isaac case 4: 6349371c9d4SSatish Balay s[0] = xb[i2]; 6359371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6369371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6379371c9d4SSatish Balay s[3] = xb[i2 + 3]; 638e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6399371c9d4SSatish Balay x[i2] = xw[0]; 6409371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6419371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6429371c9d4SSatish Balay x[i2 + 3] = xw[3]; 643e48d15efSToby Isaac i2 -= 4; 644e48d15efSToby Isaac idiag -= 16; 645e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 646e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 647e48d15efSToby Isaac vi = aj + diag[i] + 1; 648e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6499371c9d4SSatish Balay s[0] = xb[i2]; 6509371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6519371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6529371c9d4SSatish Balay s[3] = xb[i2 + 3]; 653e48d15efSToby Isaac while (nz--) { 654e48d15efSToby Isaac idx = 4 * (*vi++); 6559371c9d4SSatish Balay xw[0] = x[idx]; 6569371c9d4SSatish Balay xw[1] = x[1 + idx]; 6579371c9d4SSatish Balay xw[2] = x[2 + idx]; 6589371c9d4SSatish Balay xw[3] = x[3 + idx]; 659e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660e48d15efSToby Isaac v += 16; 661e48d15efSToby Isaac } 662e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6639371c9d4SSatish Balay x[i2] = xw[0]; 6649371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6659371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6669371c9d4SSatish Balay x[i2 + 3] = xw[3]; 667e48d15efSToby Isaac idiag -= 16; 668e48d15efSToby Isaac i2 -= 4; 669e48d15efSToby Isaac } 670e48d15efSToby Isaac break; 671e48d15efSToby Isaac case 5: 6729371c9d4SSatish Balay s[0] = xb[i2]; 6739371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6749371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6759371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6769371c9d4SSatish Balay s[4] = xb[i2 + 4]; 677e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6789371c9d4SSatish Balay x[i2] = xw[0]; 6799371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6809371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6819371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6829371c9d4SSatish Balay x[i2 + 4] = xw[4]; 683e48d15efSToby Isaac i2 -= 5; 684e48d15efSToby Isaac idiag -= 25; 685e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 686e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 687e48d15efSToby Isaac vi = aj + diag[i] + 1; 688e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6899371c9d4SSatish Balay s[0] = xb[i2]; 6909371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6919371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6929371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6939371c9d4SSatish Balay s[4] = xb[i2 + 4]; 694e48d15efSToby Isaac while (nz--) { 695e48d15efSToby Isaac idx = 5 * (*vi++); 6969371c9d4SSatish Balay xw[0] = x[idx]; 6979371c9d4SSatish Balay xw[1] = x[1 + idx]; 6989371c9d4SSatish Balay xw[2] = x[2 + idx]; 6999371c9d4SSatish Balay xw[3] = x[3 + idx]; 7009371c9d4SSatish Balay xw[4] = x[4 + idx]; 701e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702e48d15efSToby Isaac v += 25; 703e48d15efSToby Isaac } 704e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 7059371c9d4SSatish Balay x[i2] = xw[0]; 7069371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7079371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7089371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7099371c9d4SSatish Balay x[i2 + 4] = xw[4]; 710e48d15efSToby Isaac idiag -= 25; 711e48d15efSToby Isaac i2 -= 5; 712e48d15efSToby Isaac } 713e48d15efSToby Isaac break; 714e48d15efSToby Isaac case 6: 7159371c9d4SSatish Balay s[0] = xb[i2]; 7169371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7179371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7189371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7199371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7209371c9d4SSatish Balay s[5] = xb[i2 + 5]; 721e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7229371c9d4SSatish Balay x[i2] = xw[0]; 7239371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7249371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7259371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7269371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7279371c9d4SSatish Balay x[i2 + 5] = xw[5]; 728e48d15efSToby Isaac i2 -= 6; 729e48d15efSToby Isaac idiag -= 36; 730e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 731e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 732e48d15efSToby Isaac vi = aj + diag[i] + 1; 733e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7349371c9d4SSatish Balay s[0] = xb[i2]; 7359371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7369371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7379371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7389371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7399371c9d4SSatish Balay s[5] = xb[i2 + 5]; 740e48d15efSToby Isaac while (nz--) { 741e48d15efSToby Isaac idx = 6 * (*vi++); 7429371c9d4SSatish Balay xw[0] = x[idx]; 7439371c9d4SSatish Balay xw[1] = x[1 + idx]; 7449371c9d4SSatish Balay xw[2] = x[2 + idx]; 7459371c9d4SSatish Balay xw[3] = x[3 + idx]; 7469371c9d4SSatish Balay xw[4] = x[4 + idx]; 7479371c9d4SSatish Balay xw[5] = x[5 + idx]; 748e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749e48d15efSToby Isaac v += 36; 750e48d15efSToby Isaac } 751e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7529371c9d4SSatish Balay x[i2] = xw[0]; 7539371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7549371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7559371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7569371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7579371c9d4SSatish Balay x[i2 + 5] = xw[5]; 758e48d15efSToby Isaac idiag -= 36; 759e48d15efSToby Isaac i2 -= 6; 760e48d15efSToby Isaac } 761e48d15efSToby Isaac break; 762e48d15efSToby Isaac case 7: 7639371c9d4SSatish Balay s[0] = xb[i2]; 7649371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7659371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7669371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7679371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7689371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7699371c9d4SSatish Balay s[6] = xb[i2 + 6]; 770e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7719371c9d4SSatish Balay x[i2] = xw[0]; 7729371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7739371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7749371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7759371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7769371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7779371c9d4SSatish Balay x[i2 + 6] = xw[6]; 778e48d15efSToby Isaac i2 -= 7; 779e48d15efSToby Isaac idiag -= 49; 780e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 781e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 782e48d15efSToby Isaac vi = aj + diag[i] + 1; 783e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7849371c9d4SSatish Balay s[0] = xb[i2]; 7859371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7869371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7879371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7889371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7899371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7909371c9d4SSatish Balay s[6] = xb[i2 + 6]; 791e48d15efSToby Isaac while (nz--) { 792e48d15efSToby Isaac idx = 7 * (*vi++); 7939371c9d4SSatish Balay xw[0] = x[idx]; 7949371c9d4SSatish Balay xw[1] = x[1 + idx]; 7959371c9d4SSatish Balay xw[2] = x[2 + idx]; 7969371c9d4SSatish Balay xw[3] = x[3 + idx]; 7979371c9d4SSatish Balay xw[4] = x[4 + idx]; 7989371c9d4SSatish Balay xw[5] = x[5 + idx]; 7999371c9d4SSatish Balay xw[6] = x[6 + idx]; 800e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801e48d15efSToby Isaac v += 49; 802e48d15efSToby Isaac } 803e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 8049371c9d4SSatish Balay x[i2] = xw[0]; 8059371c9d4SSatish Balay x[i2 + 1] = xw[1]; 8069371c9d4SSatish Balay x[i2 + 2] = xw[2]; 8079371c9d4SSatish Balay x[i2 + 3] = xw[3]; 8089371c9d4SSatish Balay x[i2 + 4] = xw[4]; 8099371c9d4SSatish Balay x[i2 + 5] = xw[5]; 8109371c9d4SSatish Balay x[i2 + 6] = xw[6]; 811e48d15efSToby Isaac idiag -= 49; 812e48d15efSToby Isaac i2 -= 7; 813e48d15efSToby Isaac } 814e48d15efSToby Isaac break; 815e48d15efSToby Isaac default: 8169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 81796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818de80f912SBarry Smith i2 -= bs; 819e48d15efSToby Isaac idiag -= bs2; 820de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 821de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 822de80f912SBarry Smith vi = aj + diag[i] + 1; 823de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 824de80f912SBarry Smith 8259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 826de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 827de80f912SBarry Smith workt = work; 828de80f912SBarry Smith for (j = 0; j < nz; j++) { 8299566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830de80f912SBarry Smith workt += bs; 831de80f912SBarry Smith } 83296b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 83396b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834e48d15efSToby Isaac 835de80f912SBarry Smith idiag -= bs2; 836de80f912SBarry Smith i2 -= bs; 837de80f912SBarry Smith } 838e48d15efSToby Isaac break; 839e48d15efSToby Isaac } 8409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841de80f912SBarry Smith } 842e48d15efSToby Isaac its--; 843e48d15efSToby Isaac } 844e48d15efSToby Isaac while (its--) { 845e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846e48d15efSToby Isaac idiag = a->idiag; 847e48d15efSToby Isaac i2 = 0; 848e48d15efSToby Isaac switch (bs) { 849e48d15efSToby Isaac case 1: 850e48d15efSToby Isaac for (i = 0; i < m; i++) { 851e48d15efSToby Isaac v = aa + ai[i]; 852e48d15efSToby Isaac vi = aj + ai[i]; 853e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 854e48d15efSToby Isaac s[0] = b[i2]; 855e48d15efSToby Isaac for (j = 0; j < nz; j++) { 856e48d15efSToby Isaac xw[0] = x[vi[j]]; 857e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858e48d15efSToby Isaac } 859e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860e48d15efSToby Isaac x[i2] += xw[0]; 861e48d15efSToby Isaac idiag += 1; 862e48d15efSToby Isaac i2 += 1; 863e48d15efSToby Isaac } 864e48d15efSToby Isaac break; 865e48d15efSToby Isaac case 2: 866e48d15efSToby Isaac for (i = 0; i < m; i++) { 867e48d15efSToby Isaac v = aa + 4 * ai[i]; 868e48d15efSToby Isaac vi = aj + ai[i]; 869e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8709371c9d4SSatish Balay s[0] = b[i2]; 8719371c9d4SSatish Balay s[1] = b[i2 + 1]; 872e48d15efSToby Isaac for (j = 0; j < nz; j++) { 873e48d15efSToby Isaac idx = 2 * vi[j]; 874e48d15efSToby Isaac it = 4 * j; 8759371c9d4SSatish Balay xw[0] = x[idx]; 8769371c9d4SSatish Balay xw[1] = x[1 + idx]; 877e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878e48d15efSToby Isaac } 879e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8809371c9d4SSatish Balay x[i2] += xw[0]; 8819371c9d4SSatish Balay x[i2 + 1] += xw[1]; 882e48d15efSToby Isaac idiag += 4; 883e48d15efSToby Isaac i2 += 2; 884e48d15efSToby Isaac } 885e48d15efSToby Isaac break; 886e48d15efSToby Isaac case 3: 887e48d15efSToby Isaac for (i = 0; i < m; i++) { 888e48d15efSToby Isaac v = aa + 9 * ai[i]; 889e48d15efSToby Isaac vi = aj + ai[i]; 890e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8919371c9d4SSatish Balay s[0] = b[i2]; 8929371c9d4SSatish Balay s[1] = b[i2 + 1]; 8939371c9d4SSatish Balay s[2] = b[i2 + 2]; 894e48d15efSToby Isaac while (nz--) { 895e48d15efSToby Isaac idx = 3 * (*vi++); 8969371c9d4SSatish Balay xw[0] = x[idx]; 8979371c9d4SSatish Balay xw[1] = x[1 + idx]; 8989371c9d4SSatish Balay xw[2] = x[2 + idx]; 899e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900e48d15efSToby Isaac v += 9; 901e48d15efSToby Isaac } 902e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 9039371c9d4SSatish Balay x[i2] += xw[0]; 9049371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9059371c9d4SSatish Balay x[i2 + 2] += xw[2]; 906e48d15efSToby Isaac idiag += 9; 907e48d15efSToby Isaac i2 += 3; 908e48d15efSToby Isaac } 909e48d15efSToby Isaac break; 910e48d15efSToby Isaac case 4: 911e48d15efSToby Isaac for (i = 0; i < m; i++) { 912e48d15efSToby Isaac v = aa + 16 * ai[i]; 913e48d15efSToby Isaac vi = aj + ai[i]; 914e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9159371c9d4SSatish Balay s[0] = b[i2]; 9169371c9d4SSatish Balay s[1] = b[i2 + 1]; 9179371c9d4SSatish Balay s[2] = b[i2 + 2]; 9189371c9d4SSatish Balay s[3] = b[i2 + 3]; 919e48d15efSToby Isaac while (nz--) { 920e48d15efSToby Isaac idx = 4 * (*vi++); 9219371c9d4SSatish Balay xw[0] = x[idx]; 9229371c9d4SSatish Balay xw[1] = x[1 + idx]; 9239371c9d4SSatish Balay xw[2] = x[2 + idx]; 9249371c9d4SSatish Balay xw[3] = x[3 + idx]; 925e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926e48d15efSToby Isaac v += 16; 927e48d15efSToby Isaac } 928e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9299371c9d4SSatish Balay x[i2] += xw[0]; 9309371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9319371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9329371c9d4SSatish Balay x[i2 + 3] += xw[3]; 933e48d15efSToby Isaac idiag += 16; 934e48d15efSToby Isaac i2 += 4; 935e48d15efSToby Isaac } 936e48d15efSToby Isaac break; 937e48d15efSToby Isaac case 5: 938e48d15efSToby Isaac for (i = 0; i < m; i++) { 939e48d15efSToby Isaac v = aa + 25 * ai[i]; 940e48d15efSToby Isaac vi = aj + ai[i]; 941e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9429371c9d4SSatish Balay s[0] = b[i2]; 9439371c9d4SSatish Balay s[1] = b[i2 + 1]; 9449371c9d4SSatish Balay s[2] = b[i2 + 2]; 9459371c9d4SSatish Balay s[3] = b[i2 + 3]; 9469371c9d4SSatish Balay s[4] = b[i2 + 4]; 947e48d15efSToby Isaac while (nz--) { 948e48d15efSToby Isaac idx = 5 * (*vi++); 9499371c9d4SSatish Balay xw[0] = x[idx]; 9509371c9d4SSatish Balay xw[1] = x[1 + idx]; 9519371c9d4SSatish Balay xw[2] = x[2 + idx]; 9529371c9d4SSatish Balay xw[3] = x[3 + idx]; 9539371c9d4SSatish Balay xw[4] = x[4 + idx]; 954e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955e48d15efSToby Isaac v += 25; 956e48d15efSToby Isaac } 957e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9589371c9d4SSatish Balay x[i2] += xw[0]; 9599371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9609371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9619371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9629371c9d4SSatish Balay x[i2 + 4] += xw[4]; 963e48d15efSToby Isaac idiag += 25; 964e48d15efSToby Isaac i2 += 5; 965e48d15efSToby Isaac } 966e48d15efSToby Isaac break; 967e48d15efSToby Isaac case 6: 968e48d15efSToby Isaac for (i = 0; i < m; i++) { 969e48d15efSToby Isaac v = aa + 36 * ai[i]; 970e48d15efSToby Isaac vi = aj + ai[i]; 971e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9729371c9d4SSatish Balay s[0] = b[i2]; 9739371c9d4SSatish Balay s[1] = b[i2 + 1]; 9749371c9d4SSatish Balay s[2] = b[i2 + 2]; 9759371c9d4SSatish Balay s[3] = b[i2 + 3]; 9769371c9d4SSatish Balay s[4] = b[i2 + 4]; 9779371c9d4SSatish Balay s[5] = b[i2 + 5]; 978e48d15efSToby Isaac while (nz--) { 979e48d15efSToby Isaac idx = 6 * (*vi++); 9809371c9d4SSatish Balay xw[0] = x[idx]; 9819371c9d4SSatish Balay xw[1] = x[1 + idx]; 9829371c9d4SSatish Balay xw[2] = x[2 + idx]; 9839371c9d4SSatish Balay xw[3] = x[3 + idx]; 9849371c9d4SSatish Balay xw[4] = x[4 + idx]; 9859371c9d4SSatish Balay xw[5] = x[5 + idx]; 986e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987e48d15efSToby Isaac v += 36; 988e48d15efSToby Isaac } 989e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9909371c9d4SSatish Balay x[i2] += xw[0]; 9919371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9929371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9939371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9949371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9959371c9d4SSatish Balay x[i2 + 5] += xw[5]; 996e48d15efSToby Isaac idiag += 36; 997e48d15efSToby Isaac i2 += 6; 998e48d15efSToby Isaac } 999e48d15efSToby Isaac break; 1000e48d15efSToby Isaac case 7: 1001e48d15efSToby Isaac for (i = 0; i < m; i++) { 1002e48d15efSToby Isaac v = aa + 49 * ai[i]; 1003e48d15efSToby Isaac vi = aj + ai[i]; 1004e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10059371c9d4SSatish Balay s[0] = b[i2]; 10069371c9d4SSatish Balay s[1] = b[i2 + 1]; 10079371c9d4SSatish Balay s[2] = b[i2 + 2]; 10089371c9d4SSatish Balay s[3] = b[i2 + 3]; 10099371c9d4SSatish Balay s[4] = b[i2 + 4]; 10109371c9d4SSatish Balay s[5] = b[i2 + 5]; 10119371c9d4SSatish Balay s[6] = b[i2 + 6]; 1012e48d15efSToby Isaac while (nz--) { 1013e48d15efSToby Isaac idx = 7 * (*vi++); 10149371c9d4SSatish Balay xw[0] = x[idx]; 10159371c9d4SSatish Balay xw[1] = x[1 + idx]; 10169371c9d4SSatish Balay xw[2] = x[2 + idx]; 10179371c9d4SSatish Balay xw[3] = x[3 + idx]; 10189371c9d4SSatish Balay xw[4] = x[4 + idx]; 10199371c9d4SSatish Balay xw[5] = x[5 + idx]; 10209371c9d4SSatish Balay xw[6] = x[6 + idx]; 1021e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022e48d15efSToby Isaac v += 49; 1023e48d15efSToby Isaac } 1024e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10259371c9d4SSatish Balay x[i2] += xw[0]; 10269371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10279371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10289371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10299371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10309371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10319371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1032e48d15efSToby Isaac idiag += 49; 1033e48d15efSToby Isaac i2 += 7; 1034e48d15efSToby Isaac } 1035e48d15efSToby Isaac break; 1036e48d15efSToby Isaac default: 1037e48d15efSToby Isaac for (i = 0; i < m; i++) { 1038e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1039e48d15efSToby Isaac vi = aj + ai[i]; 1040e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1041e48d15efSToby Isaac 10429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1043e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1044e48d15efSToby Isaac workt = work; 1045e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10469566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047e48d15efSToby Isaac workt += bs; 1048e48d15efSToby Isaac } 1049e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051e48d15efSToby Isaac 1052e48d15efSToby Isaac idiag += bs2; 1053e48d15efSToby Isaac i2 += bs; 1054e48d15efSToby Isaac } 1055e48d15efSToby Isaac break; 1056e48d15efSToby Isaac } 10579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058e48d15efSToby Isaac } 1059e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1061e48d15efSToby Isaac i2 = bs * (m - 1); 1062e48d15efSToby Isaac switch (bs) { 1063e48d15efSToby Isaac case 1: 1064e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1065e48d15efSToby Isaac v = aa + ai[i]; 1066e48d15efSToby Isaac vi = aj + ai[i]; 1067e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1068e48d15efSToby Isaac s[0] = b[i2]; 1069e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1070e48d15efSToby Isaac xw[0] = x[vi[j]]; 1071e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072e48d15efSToby Isaac } 1073e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074e48d15efSToby Isaac x[i2] += xw[0]; 1075e48d15efSToby Isaac idiag -= 1; 1076e48d15efSToby Isaac i2 -= 1; 1077e48d15efSToby Isaac } 1078e48d15efSToby Isaac break; 1079e48d15efSToby Isaac case 2: 1080e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1081e48d15efSToby Isaac v = aa + 4 * ai[i]; 1082e48d15efSToby Isaac vi = aj + ai[i]; 1083e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10849371c9d4SSatish Balay s[0] = b[i2]; 10859371c9d4SSatish Balay s[1] = b[i2 + 1]; 1086e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1087e48d15efSToby Isaac idx = 2 * vi[j]; 1088e48d15efSToby Isaac it = 4 * j; 10899371c9d4SSatish Balay xw[0] = x[idx]; 10909371c9d4SSatish Balay xw[1] = x[1 + idx]; 1091e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092e48d15efSToby Isaac } 1093e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10949371c9d4SSatish Balay x[i2] += xw[0]; 10959371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1096e48d15efSToby Isaac idiag -= 4; 1097e48d15efSToby Isaac i2 -= 2; 1098e48d15efSToby Isaac } 1099e48d15efSToby Isaac break; 1100e48d15efSToby Isaac case 3: 1101e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1102e48d15efSToby Isaac v = aa + 9 * ai[i]; 1103e48d15efSToby Isaac vi = aj + ai[i]; 1104e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11059371c9d4SSatish Balay s[0] = b[i2]; 11069371c9d4SSatish Balay s[1] = b[i2 + 1]; 11079371c9d4SSatish Balay s[2] = b[i2 + 2]; 1108e48d15efSToby Isaac while (nz--) { 1109e48d15efSToby Isaac idx = 3 * (*vi++); 11109371c9d4SSatish Balay xw[0] = x[idx]; 11119371c9d4SSatish Balay xw[1] = x[1 + idx]; 11129371c9d4SSatish Balay xw[2] = x[2 + idx]; 1113e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114e48d15efSToby Isaac v += 9; 1115e48d15efSToby Isaac } 1116e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11179371c9d4SSatish Balay x[i2] += xw[0]; 11189371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11199371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1120e48d15efSToby Isaac idiag -= 9; 1121e48d15efSToby Isaac i2 -= 3; 1122e48d15efSToby Isaac } 1123e48d15efSToby Isaac break; 1124e48d15efSToby Isaac case 4: 1125e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1126e48d15efSToby Isaac v = aa + 16 * ai[i]; 1127e48d15efSToby Isaac vi = aj + ai[i]; 1128e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11299371c9d4SSatish Balay s[0] = b[i2]; 11309371c9d4SSatish Balay s[1] = b[i2 + 1]; 11319371c9d4SSatish Balay s[2] = b[i2 + 2]; 11329371c9d4SSatish Balay s[3] = b[i2 + 3]; 1133e48d15efSToby Isaac while (nz--) { 1134e48d15efSToby Isaac idx = 4 * (*vi++); 11359371c9d4SSatish Balay xw[0] = x[idx]; 11369371c9d4SSatish Balay xw[1] = x[1 + idx]; 11379371c9d4SSatish Balay xw[2] = x[2 + idx]; 11389371c9d4SSatish Balay xw[3] = x[3 + idx]; 1139e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140e48d15efSToby Isaac v += 16; 1141e48d15efSToby Isaac } 1142e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11439371c9d4SSatish Balay x[i2] += xw[0]; 11449371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11459371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11469371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1147e48d15efSToby Isaac idiag -= 16; 1148e48d15efSToby Isaac i2 -= 4; 1149e48d15efSToby Isaac } 1150e48d15efSToby Isaac break; 1151e48d15efSToby Isaac case 5: 1152e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1153e48d15efSToby Isaac v = aa + 25 * ai[i]; 1154e48d15efSToby Isaac vi = aj + ai[i]; 1155e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11569371c9d4SSatish Balay s[0] = b[i2]; 11579371c9d4SSatish Balay s[1] = b[i2 + 1]; 11589371c9d4SSatish Balay s[2] = b[i2 + 2]; 11599371c9d4SSatish Balay s[3] = b[i2 + 3]; 11609371c9d4SSatish Balay s[4] = b[i2 + 4]; 1161e48d15efSToby Isaac while (nz--) { 1162e48d15efSToby Isaac idx = 5 * (*vi++); 11639371c9d4SSatish Balay xw[0] = x[idx]; 11649371c9d4SSatish Balay xw[1] = x[1 + idx]; 11659371c9d4SSatish Balay xw[2] = x[2 + idx]; 11669371c9d4SSatish Balay xw[3] = x[3 + idx]; 11679371c9d4SSatish Balay xw[4] = x[4 + idx]; 1168e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169e48d15efSToby Isaac v += 25; 1170e48d15efSToby Isaac } 1171e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11729371c9d4SSatish Balay x[i2] += xw[0]; 11739371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11749371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11759371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11769371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1177e48d15efSToby Isaac idiag -= 25; 1178e48d15efSToby Isaac i2 -= 5; 1179e48d15efSToby Isaac } 1180e48d15efSToby Isaac break; 1181e48d15efSToby Isaac case 6: 1182e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1183e48d15efSToby Isaac v = aa + 36 * ai[i]; 1184e48d15efSToby Isaac vi = aj + ai[i]; 1185e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11869371c9d4SSatish Balay s[0] = b[i2]; 11879371c9d4SSatish Balay s[1] = b[i2 + 1]; 11889371c9d4SSatish Balay s[2] = b[i2 + 2]; 11899371c9d4SSatish Balay s[3] = b[i2 + 3]; 11909371c9d4SSatish Balay s[4] = b[i2 + 4]; 11919371c9d4SSatish Balay s[5] = b[i2 + 5]; 1192e48d15efSToby Isaac while (nz--) { 1193e48d15efSToby Isaac idx = 6 * (*vi++); 11949371c9d4SSatish Balay xw[0] = x[idx]; 11959371c9d4SSatish Balay xw[1] = x[1 + idx]; 11969371c9d4SSatish Balay xw[2] = x[2 + idx]; 11979371c9d4SSatish Balay xw[3] = x[3 + idx]; 11989371c9d4SSatish Balay xw[4] = x[4 + idx]; 11999371c9d4SSatish Balay xw[5] = x[5 + idx]; 1200e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201e48d15efSToby Isaac v += 36; 1202e48d15efSToby Isaac } 1203e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 12049371c9d4SSatish Balay x[i2] += xw[0]; 12059371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12069371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12079371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12089371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12099371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1210e48d15efSToby Isaac idiag -= 36; 1211e48d15efSToby Isaac i2 -= 6; 1212e48d15efSToby Isaac } 1213e48d15efSToby Isaac break; 1214e48d15efSToby Isaac case 7: 1215e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1216e48d15efSToby Isaac v = aa + 49 * ai[i]; 1217e48d15efSToby Isaac vi = aj + ai[i]; 1218e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12199371c9d4SSatish Balay s[0] = b[i2]; 12209371c9d4SSatish Balay s[1] = b[i2 + 1]; 12219371c9d4SSatish Balay s[2] = b[i2 + 2]; 12229371c9d4SSatish Balay s[3] = b[i2 + 3]; 12239371c9d4SSatish Balay s[4] = b[i2 + 4]; 12249371c9d4SSatish Balay s[5] = b[i2 + 5]; 12259371c9d4SSatish Balay s[6] = b[i2 + 6]; 1226e48d15efSToby Isaac while (nz--) { 1227e48d15efSToby Isaac idx = 7 * (*vi++); 12289371c9d4SSatish Balay xw[0] = x[idx]; 12299371c9d4SSatish Balay xw[1] = x[1 + idx]; 12309371c9d4SSatish Balay xw[2] = x[2 + idx]; 12319371c9d4SSatish Balay xw[3] = x[3 + idx]; 12329371c9d4SSatish Balay xw[4] = x[4 + idx]; 12339371c9d4SSatish Balay xw[5] = x[5 + idx]; 12349371c9d4SSatish Balay xw[6] = x[6 + idx]; 1235e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236e48d15efSToby Isaac v += 49; 1237e48d15efSToby Isaac } 1238e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12399371c9d4SSatish Balay x[i2] += xw[0]; 12409371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12419371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12429371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12439371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12449371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12459371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1246e48d15efSToby Isaac idiag -= 49; 1247e48d15efSToby Isaac i2 -= 7; 1248e48d15efSToby Isaac } 1249e48d15efSToby Isaac break; 1250e48d15efSToby Isaac default: 1251e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1252e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1253e48d15efSToby Isaac vi = aj + ai[i]; 1254e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1255e48d15efSToby Isaac 12569566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1257e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1258e48d15efSToby Isaac workt = work; 1259e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261e48d15efSToby Isaac workt += bs; 1262e48d15efSToby Isaac } 1263e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265e48d15efSToby Isaac 1266e48d15efSToby Isaac idiag -= bs2; 1267e48d15efSToby Isaac i2 -= bs; 1268e48d15efSToby Isaac } 1269e48d15efSToby Isaac break; 1270e48d15efSToby Isaac } 12719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272e48d15efSToby Isaac } 1273e48d15efSToby Isaac } 12749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1277de80f912SBarry Smith } 1278de80f912SBarry Smith 1279af674e45SBarry Smith /* 128081824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281af674e45SBarry Smith */ 1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1286af674e45SBarry Smith #endif 1287af674e45SBarry Smith 1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289d71ae5a4SJacob Faibussowitsch { 1290af674e45SBarry Smith Mat A = *AA; 1291af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 129417ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1295f15d580aSBarry Smith const PetscScalar *value = v; 12964bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1297af674e45SBarry Smith 1298af674e45SBarry Smith PetscFunctionBegin; 1299ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300af674e45SBarry Smith stepval = (n - 1) * 4; 1301af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1302af674e45SBarry Smith row = im[k]; 1303af674e45SBarry Smith rp = aj + ai[row]; 1304af674e45SBarry Smith ap = aa + 16 * ai[row]; 1305af674e45SBarry Smith nrow = ailen[row]; 1306af674e45SBarry Smith low = 0; 130717ec6a02SBarry Smith high = nrow; 1308af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1309af674e45SBarry Smith col = in[l]; 1310db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1311db4deed7SKarl Rupp else high = nrow; 131217ec6a02SBarry Smith lastcol = col; 13131e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1314af674e45SBarry Smith while (high - low > 7) { 1315af674e45SBarry Smith t = (low + high) / 2; 1316af674e45SBarry Smith if (rp[t] > col) high = t; 1317af674e45SBarry Smith else low = t; 1318af674e45SBarry Smith } 1319af674e45SBarry Smith for (i = low; i < high; i++) { 1320af674e45SBarry Smith if (rp[i] > col) break; 1321af674e45SBarry Smith if (rp[i] == col) { 1322af674e45SBarry Smith bap = ap + 16 * i; 1323af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1324ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325af674e45SBarry Smith } 1326af674e45SBarry Smith goto noinsert2; 1327af674e45SBarry Smith } 1328af674e45SBarry Smith } 1329af674e45SBarry Smith N = nrow++ - 1; 133017ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1331af674e45SBarry Smith /* shift up all the later entries in this row */ 1332af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1333af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13349566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335af674e45SBarry Smith } 133648a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337af674e45SBarry Smith rp[i] = col; 1338af674e45SBarry Smith bap = ap + 16 * i; 1339af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1340ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341af674e45SBarry Smith } 1342af674e45SBarry Smith noinsert2:; 1343af674e45SBarry Smith low = i; 1344af674e45SBarry Smith } 1345af674e45SBarry Smith ailen[row] = nrow; 1346af674e45SBarry Smith } 1347be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1348af674e45SBarry Smith } 1349af674e45SBarry Smith 1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1354af674e45SBarry Smith #endif 1355af674e45SBarry Smith 1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357d71ae5a4SJacob Faibussowitsch { 1358af674e45SBarry Smith Mat A = *AA; 1359af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1362c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 136317ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1364af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1365af674e45SBarry Smith 1366af674e45SBarry Smith PetscFunctionBegin; 1367af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13689371c9d4SSatish Balay row = im[k]; 13699371c9d4SSatish Balay brow = row / 4; 1370af674e45SBarry Smith rp = aj + ai[brow]; 1371af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1372af674e45SBarry Smith nrow = ailen[brow]; 1373af674e45SBarry Smith low = 0; 137417ec6a02SBarry Smith high = nrow; 1375af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13769371c9d4SSatish Balay col = in[l]; 13779371c9d4SSatish Balay bcol = col / 4; 13789371c9d4SSatish Balay ridx = row % 4; 13799371c9d4SSatish Balay cidx = col % 4; 1380af674e45SBarry Smith value = v[l + k * n]; 1381db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1382db4deed7SKarl Rupp else high = nrow; 138317ec6a02SBarry Smith lastcol = col; 1384af674e45SBarry Smith while (high - low > 7) { 1385af674e45SBarry Smith t = (low + high) / 2; 1386af674e45SBarry Smith if (rp[t] > bcol) high = t; 1387af674e45SBarry Smith else low = t; 1388af674e45SBarry Smith } 1389af674e45SBarry Smith for (i = low; i < high; i++) { 1390af674e45SBarry Smith if (rp[i] > bcol) break; 1391af674e45SBarry Smith if (rp[i] == bcol) { 1392af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1393af674e45SBarry Smith *bap += value; 1394af674e45SBarry Smith goto noinsert1; 1395af674e45SBarry Smith } 1396af674e45SBarry Smith } 1397af674e45SBarry Smith N = nrow++ - 1; 139817ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1399af674e45SBarry Smith /* shift up all the later entries in this row */ 14009566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 14019566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 14029566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403af674e45SBarry Smith rp[i] = bcol; 1404af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1405af674e45SBarry Smith noinsert1:; 1406af674e45SBarry Smith low = i; 1407af674e45SBarry Smith } 1408af674e45SBarry Smith ailen[brow] = nrow; 1409af674e45SBarry Smith } 1410be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1411af674e45SBarry Smith } 1412af674e45SBarry Smith 1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1414d71ae5a4SJacob Faibussowitsch { 14153b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14161a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14171a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14183b2fbd54SBarry Smith 14193a40ed3dSBarry Smith PetscFunctionBegin; 14203b2fbd54SBarry Smith *nn = n; 14213ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 14223b2fbd54SBarry Smith if (symmetric) { 14239566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1424553b3c51SBarry Smith nz = tia[n]; 14253b2fbd54SBarry Smith } else { 14269371c9d4SSatish Balay tia = a->i; 14279371c9d4SSatish Balay tja = a->j; 14283b2fbd54SBarry Smith } 14293b2fbd54SBarry Smith 1430ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1431ecc77c7aSBarry Smith (*nn) *= bs; 14328f7157efSSatish Balay /* malloc & create the natural set of indices */ 14339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14349985e31cSBarry Smith if (n) { 14352462f5fdSStefano Zampini (*ia)[0] = oshift; 1436ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 14379985e31cSBarry Smith } 1438ecc77c7aSBarry Smith 1439ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1440ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1441ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 14428f7157efSSatish Balay } 1443ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1444ecc77c7aSBarry Smith 14451a83f524SJed Brown if (inja) { 14469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 14479985e31cSBarry Smith cnt = 0; 14489985e31cSBarry Smith for (i = 0; i < n; i++) { 14499985e31cSBarry Smith for (j = 0; j < bs; j++) { 14509985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 1451ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 14529985e31cSBarry Smith } 14539985e31cSBarry Smith } 14549985e31cSBarry Smith } 14559985e31cSBarry Smith } 14569985e31cSBarry Smith 14578f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 14589566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 14599566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 14608f7157efSSatish Balay } 1461f6d58c54SBarry Smith } else if (oshift == 1) { 1462715a17b5SBarry Smith if (symmetric) { 1463a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1464715a17b5SBarry Smith /* add 1 to i and j indices */ 1465715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1466715a17b5SBarry Smith *ia = tia; 1467715a17b5SBarry Smith if (ja) { 1468715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1469715a17b5SBarry Smith *ja = tja; 1470715a17b5SBarry Smith } 1471715a17b5SBarry Smith } else { 1472a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1473f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 14749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1475f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1476f6d58c54SBarry Smith if (ja) { 14779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1478f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1479f6d58c54SBarry Smith } 1480715a17b5SBarry Smith } 14818f7157efSSatish Balay } else { 14828f7157efSSatish Balay *ia = tia; 1483ecc77c7aSBarry Smith if (ja) *ja = tja; 14848f7157efSSatish Balay } 14853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14863b2fbd54SBarry Smith } 14873b2fbd54SBarry Smith 1488d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1489d71ae5a4SJacob Faibussowitsch { 14903a40ed3dSBarry Smith PetscFunctionBegin; 14913ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1492715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 14939566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 14949566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 14953b2fbd54SBarry Smith } 14963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14973b2fbd54SBarry Smith } 14983b2fbd54SBarry Smith 1499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1500d71ae5a4SJacob Faibussowitsch { 15012d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15022d61bbb3SSatish Balay 1503433994e6SBarry Smith PetscFunctionBegin; 1504b4e2f619SBarry Smith if (A->hash_active) { 1505b4e2f619SBarry Smith PetscInt bs; 1506e3c72094SPierre Jolivet A->ops[0] = a->cops; 1507b4e2f619SBarry Smith PetscCall(PetscHMapIJVDestroy(&a->ht)); 1508b4e2f619SBarry Smith PetscCall(MatGetBlockSize(A, &bs)); 1509b4e2f619SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1510b4e2f619SBarry Smith PetscCall(PetscFree(a->dnz)); 1511b4e2f619SBarry Smith PetscCall(PetscFree(a->bdnz)); 1512b4e2f619SBarry Smith A->hash_active = PETSC_FALSE; 1513b4e2f619SBarry Smith } 15143ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 15159566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15179566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 1518*421480d9SBarry Smith PetscCall(PetscFree(a->diag)); 15199566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15209566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15219566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15229566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15239566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15249566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15269566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15279566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1528c4319e64SHong Zhang 15299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15319566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1532901853e0SKris Buschelman 15339566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 15457ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 15469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 15477ea3e4caSstefano_zampini #endif 15489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 15492e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 15503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15512d61bbb3SSatish Balay } 15522d61bbb3SSatish Balay 155366976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1554d71ae5a4SJacob Faibussowitsch { 15552d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15562d61bbb3SSatish Balay 15572d61bbb3SSatish Balay PetscFunctionBegin; 1558aa275fccSKris Buschelman switch (op) { 1559d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 1560d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 1561d71ae5a4SJacob Faibussowitsch break; 1562d71ae5a4SJacob Faibussowitsch case MAT_KEEP_NONZERO_PATTERN: 1563d71ae5a4SJacob Faibussowitsch a->keepnonzeropattern = flg; 1564d71ae5a4SJacob Faibussowitsch break; 1565d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATIONS: 1566d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? 0 : 1); 1567d71ae5a4SJacob Faibussowitsch break; 1568d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATION_ERR: 1569d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -1 : 0); 1570d71ae5a4SJacob Faibussowitsch break; 1571d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_ALLOCATION_ERR: 1572d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -2 : 0); 1573d71ae5a4SJacob Faibussowitsch break; 1574d71ae5a4SJacob Faibussowitsch case MAT_UNUSED_NONZERO_LOCATION_ERR: 1575d71ae5a4SJacob Faibussowitsch a->nounused = (flg ? -1 : 0); 1576d71ae5a4SJacob Faibussowitsch break; 1577d71ae5a4SJacob Faibussowitsch default: 1578888c827cSStefano Zampini break; 15792d61bbb3SSatish Balay } 15803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15812d61bbb3SSatish Balay } 15822d61bbb3SSatish Balay 158352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1585d71ae5a4SJacob Faibussowitsch { 158652768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 158752768537SHong Zhang MatScalar *aa_i; 158887828ca2SBarry Smith PetscScalar *v_i; 15892d61bbb3SSatish Balay 15902d61bbb3SSatish Balay PetscFunctionBegin; 1591d0f46423SBarry Smith bs = A->rmap->bs; 159252768537SHong Zhang bs2 = bs * bs; 15935f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 15942d61bbb3SSatish Balay 15952d61bbb3SSatish Balay bn = row / bs; /* Block number */ 15962d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 15972d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 15982d61bbb3SSatish Balay *nz = bs * M; 15992d61bbb3SSatish Balay 16002d61bbb3SSatish Balay if (v) { 1601f4259b30SLisandro Dalcin *v = NULL; 16022d61bbb3SSatish Balay if (*nz) { 16039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16042d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16052d61bbb3SSatish Balay v_i = *v + i * bs; 16062d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 160726fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16082d61bbb3SSatish Balay } 16092d61bbb3SSatish Balay } 16102d61bbb3SSatish Balay } 16112d61bbb3SSatish Balay 16122d61bbb3SSatish Balay if (idx) { 1613f4259b30SLisandro Dalcin *idx = NULL; 16142d61bbb3SSatish Balay if (*nz) { 16159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16162d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16172d61bbb3SSatish Balay idx_i = *idx + i * bs; 16182d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 161926fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16202d61bbb3SSatish Balay } 16212d61bbb3SSatish Balay } 16222d61bbb3SSatish Balay } 16233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16242d61bbb3SSatish Balay } 16252d61bbb3SSatish Balay 1626d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1627d71ae5a4SJacob Faibussowitsch { 162852768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 162952768537SHong Zhang 163052768537SHong Zhang PetscFunctionBegin; 16319566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 16323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 163352768537SHong Zhang } 163452768537SHong Zhang 1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1636d71ae5a4SJacob Faibussowitsch { 16372d61bbb3SSatish Balay PetscFunctionBegin; 16389566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 16399566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 16403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16412d61bbb3SSatish Balay } 16422d61bbb3SSatish Balay 164366976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1644d71ae5a4SJacob Faibussowitsch { 164520e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 16462d61bbb3SSatish Balay Mat C; 164720e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 164820e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 164920e84f26SHong Zhang MatScalar *ata, *aa = a->a; 16502d61bbb3SSatish Balay 16512d61bbb3SSatish Balay PetscFunctionBegin; 16527fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 16539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1654cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 165520e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 16562d61bbb3SSatish Balay 16579566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 16589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 16599566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 16609566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 166120e84f26SHong Zhang 166220e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 166320e84f26SHong Zhang ati = at->i; 166420e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1665fc4dec0aSBarry Smith } else { 1666fc4dec0aSBarry Smith C = *B; 166720e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 166820e84f26SHong Zhang ati = at->i; 1669fc4dec0aSBarry Smith } 1670fc4dec0aSBarry Smith 167120e84f26SHong Zhang atj = at->j; 167220e84f26SHong Zhang ata = at->a; 167320e84f26SHong Zhang 167420e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 16759566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 167620e84f26SHong Zhang 167720e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 16782d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 167920e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 168020e84f26SHong Zhang for (j = 0; j < anzj; j++) { 168120e84f26SHong Zhang atj[atfill[*aj]] = i; 168220e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 1683ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 16842d61bbb3SSatish Balay } 168520e84f26SHong Zhang atfill[*aj++] += 1; 168620e84f26SHong Zhang } 168720e84f26SHong Zhang } 16889566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 16899566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 16902d61bbb3SSatish Balay 169120e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 16929566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 169320e84f26SHong Zhang 1694cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 169558b7e2c1SStefano Zampini PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 16962d61bbb3SSatish Balay *B = C; 16972d61bbb3SSatish Balay } else { 16989566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 16992d61bbb3SSatish Balay } 17003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17012d61bbb3SSatish Balay } 17022d61bbb3SSatish Balay 1703ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1704d71ae5a4SJacob Faibussowitsch { 1705453d3561SHong Zhang Mat Btrans; 1706453d3561SHong Zhang 1707453d3561SHong Zhang PetscFunctionBegin; 1708453d3561SHong Zhang *f = PETSC_FALSE; 1709acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17109566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 17123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1713453d3561SHong Zhang } 1714453d3561SHong Zhang 1715618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1716d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1717d71ae5a4SJacob Faibussowitsch { 1718b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1719b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1720b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1721b51a4376SLisandro Dalcin PetscScalar *matvals; 17222593348eSBarry Smith 17233a40ed3dSBarry Smith PetscFunctionBegin; 17249566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17253b2fbd54SBarry Smith 1726b51a4376SLisandro Dalcin M = mat->rmap->N; 1727b51a4376SLisandro Dalcin N = mat->cmap->N; 1728b51a4376SLisandro Dalcin m = mat->rmap->n; 1729b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1730b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 17312593348eSBarry Smith 1732b51a4376SLisandro Dalcin /* write matrix header */ 1733b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 17349371c9d4SSatish Balay header[1] = M; 17359371c9d4SSatish Balay header[2] = N; 17369371c9d4SSatish Balay header[3] = nz; 17379566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 17382593348eSBarry Smith 1739b51a4376SLisandro Dalcin /* store row lengths */ 17409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1741b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 17429371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 17439566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 17449566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1745b51a4376SLisandro Dalcin 1746b51a4376SLisandro Dalcin /* store column indices */ 17479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1748b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1749b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1750b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17519371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 17525f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17539566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 17549566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 17552593348eSBarry Smith 17562593348eSBarry Smith /* store nonzero values */ 17579566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1758b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1759b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1760b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17619371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 17625f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17639566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 17649566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1765ce6f0cecSBarry Smith 1766b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 17679566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 17683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17692593348eSBarry Smith } 17702593348eSBarry Smith 1771d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1772d71ae5a4SJacob Faibussowitsch { 17737dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 17747dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 17757dc0baabSHong Zhang 17767dc0baabSHong Zhang PetscFunctionBegin; 17779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 17787dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 17799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 178048a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 17819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 17827dc0baabSHong Zhang } 17839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 17843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17857dc0baabSHong Zhang } 17867dc0baabSHong Zhang 1787d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1788d71ae5a4SJacob Faibussowitsch { 1789b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1790d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1791f3ef73ceSBarry Smith PetscViewerFormat format; 17922593348eSBarry Smith 17933a40ed3dSBarry Smith PetscFunctionBegin; 17947dc0baabSHong Zhang if (A->structure_only) { 17959566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 17963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17977dc0baabSHong Zhang } 17987dc0baabSHong Zhang 17999566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1800456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1802fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1803ade3a672SBarry Smith const char *matname; 1804bcd9e38bSBarry Smith Mat aij; 18059566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18069566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18079566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18089566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18099566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 181004929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 18113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1812fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 181444cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 181544cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 181744cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 181844cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1819aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18200e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18219371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18220e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18239371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18240e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18260ef38995SBarry Smith } 182744cd7ae7SLois Curfman McInnes #else 182848a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 182944cd7ae7SLois Curfman McInnes #endif 183044cd7ae7SLois Curfman McInnes } 183144cd7ae7SLois Curfman McInnes } 18329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 183344cd7ae7SLois Curfman McInnes } 183444cd7ae7SLois Curfman McInnes } 18359566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18360ef38995SBarry Smith } else { 18379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1838b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1839b6490206SBarry Smith for (j = 0; j < bs; j++) { 18409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1841b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1842b6490206SBarry Smith for (l = 0; l < bs; l++) { 1843aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18440e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 18459371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18460e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 18479371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18480ef38995SBarry Smith } else { 18499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 185088685aaeSLois Curfman McInnes } 185188685aaeSLois Curfman McInnes #else 18529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 185388685aaeSLois Curfman McInnes #endif 18542593348eSBarry Smith } 18552593348eSBarry Smith } 18569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18572593348eSBarry Smith } 18582593348eSBarry Smith } 18599566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1860b6490206SBarry Smith } 18619566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 18623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18632593348eSBarry Smith } 18642593348eSBarry Smith 18659804daf3SBarry Smith #include <petscdraw.h> 1866d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1867d71ae5a4SJacob Faibussowitsch { 186877ed5343SBarry Smith Mat A = (Mat)Aa; 18693270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 18706497c311SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 18710e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 18723f1db9ecSBarry Smith MatScalar *aa; 1873b0a32e0cSBarry Smith PetscViewer viewer; 1874b3e7f47fSJed Brown PetscViewerFormat format; 18756497c311SBarry Smith int color; 18763270192aSSatish Balay 18773a40ed3dSBarry Smith PetscFunctionBegin; 18789566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 18799566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 18809566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 188177ed5343SBarry Smith 18823270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1883b3e7f47fSJed Brown 1884b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1885d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1886383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1887b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 18883270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 18893270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 18909371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 18919371c9d4SSatish Balay y_r = y_l + 1.0; 18929371c9d4SSatish Balay x_l = a->j[j] * bs; 18939371c9d4SSatish Balay x_r = x_l + 1.0; 18943270192aSSatish Balay aa = a->a + j * bs2; 18953270192aSSatish Balay for (k = 0; k < bs; k++) { 18963270192aSSatish Balay for (l = 0; l < bs; l++) { 18970e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 18989566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 18993270192aSSatish Balay } 19003270192aSSatish Balay } 19013270192aSSatish Balay } 19023270192aSSatish Balay } 1903b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19043270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19053270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19069371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19079371c9d4SSatish Balay y_r = y_l + 1.0; 19089371c9d4SSatish Balay x_l = a->j[j] * bs; 19099371c9d4SSatish Balay x_r = x_l + 1.0; 19103270192aSSatish Balay aa = a->a + j * bs2; 19113270192aSSatish Balay for (k = 0; k < bs; k++) { 19123270192aSSatish Balay for (l = 0; l < bs; l++) { 19130e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19149566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19153270192aSSatish Balay } 19163270192aSSatish Balay } 19173270192aSSatish Balay } 19183270192aSSatish Balay } 1919b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19203270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19213270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19229371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19239371c9d4SSatish Balay y_r = y_l + 1.0; 19249371c9d4SSatish Balay x_l = a->j[j] * bs; 19259371c9d4SSatish Balay x_r = x_l + 1.0; 19263270192aSSatish Balay aa = a->a + j * bs2; 19273270192aSSatish Balay for (k = 0; k < bs; k++) { 19283270192aSSatish Balay for (l = 0; l < bs; l++) { 19290e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 19309566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19313270192aSSatish Balay } 19323270192aSSatish Balay } 19333270192aSSatish Balay } 19343270192aSSatish Balay } 1935d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1936b3e7f47fSJed Brown } else { 1937b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1938b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1939b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1940b3e7f47fSJed Brown PetscDraw popup; 1941b3e7f47fSJed Brown 1942b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 1943b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1944b3e7f47fSJed Brown } 1945383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 19469566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 19479566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1948383922c3SLisandro Dalcin 1949d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1950b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 1951b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 19529371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19539371c9d4SSatish Balay y_r = y_l + 1.0; 19549371c9d4SSatish Balay x_l = a->j[j] * bs; 19559371c9d4SSatish Balay x_r = x_l + 1.0; 1956b3e7f47fSJed Brown aa = a->a + j * bs2; 1957b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 1958b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 1959383922c3SLisandro Dalcin MatScalar v = *aa++; 1960383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 19619566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1962b3e7f47fSJed Brown } 1963b3e7f47fSJed Brown } 1964b3e7f47fSJed Brown } 1965b3e7f47fSJed Brown } 1966d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1967b3e7f47fSJed Brown } 19683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 196977ed5343SBarry Smith } 19703270192aSSatish Balay 1971d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 1972d71ae5a4SJacob Faibussowitsch { 19730e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 1974b0a32e0cSBarry Smith PetscDraw draw; 1975ace3abfcSBarry Smith PetscBool isnull; 19763270192aSSatish Balay 197777ed5343SBarry Smith PetscFunctionBegin; 19789566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 19799566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 19803ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 198177ed5343SBarry Smith 19829371c9d4SSatish Balay xr = A->cmap->n; 19839371c9d4SSatish Balay yr = A->rmap->N; 19849371c9d4SSatish Balay h = yr / 10.0; 19859371c9d4SSatish Balay w = xr / 10.0; 19869371c9d4SSatish Balay xr += w; 19879371c9d4SSatish Balay yr += h; 19889371c9d4SSatish Balay xl = -w; 19899371c9d4SSatish Balay yl = -h; 19909566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 19919566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 19929566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 19939566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 19949566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 19953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19963270192aSSatish Balay } 19973270192aSSatish Balay 1998d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 1999d71ae5a4SJacob Faibussowitsch { 20009f196a02SMartin Diehl PetscBool isascii, isbinary, isdraw; 20012593348eSBarry Smith 20023a40ed3dSBarry Smith PetscFunctionBegin; 20039f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 20049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20059566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 20069f196a02SMartin Diehl if (isascii) { 20079566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20080f5bd95cSBarry Smith } else if (isbinary) { 20099566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20100f5bd95cSBarry Smith } else if (isdraw) { 20119566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20125cd90555SBarry Smith } else { 2013a5e6ed63SBarry Smith Mat B; 20149566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20159566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20172593348eSBarry Smith } 20183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20192593348eSBarry Smith } 2020b6490206SBarry Smith 2021d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2022d71ae5a4SJacob Faibussowitsch { 2023cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2024c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2025c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2026d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 202797e567efSBarry Smith MatScalar *ap, *aa = a->a; 2028cd0e1443SSatish Balay 20293a40ed3dSBarry Smith PetscFunctionBegin; 20302d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 20319371c9d4SSatish Balay row = im[k]; 20329371c9d4SSatish Balay brow = row / bs; 20339371c9d4SSatish Balay if (row < 0) { 20349371c9d4SSatish Balay v += n; 20359371c9d4SSatish Balay continue; 20369371c9d4SSatish Balay } /* negative row */ 203754c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 20388e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 20398e3a54c0SPierre Jolivet ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 20402c3acbe9SBarry Smith nrow = ailen[brow]; 20412d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 20429371c9d4SSatish Balay if (in[l] < 0) { 20439371c9d4SSatish Balay v++; 20449371c9d4SSatish Balay continue; 20459371c9d4SSatish Balay } /* negative column */ 204654c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 20472d61bbb3SSatish Balay col = in[l]; 20482d61bbb3SSatish Balay bcol = col / bs; 20492d61bbb3SSatish Balay cidx = col % bs; 20502d61bbb3SSatish Balay ridx = row % bs; 20512d61bbb3SSatish Balay high = nrow; 20522d61bbb3SSatish Balay low = 0; /* assume unsorted */ 20532d61bbb3SSatish Balay while (high - low > 5) { 2054cd0e1443SSatish Balay t = (low + high) / 2; 2055cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2056cd0e1443SSatish Balay else low = t; 2057cd0e1443SSatish Balay } 2058cd0e1443SSatish Balay for (i = low; i < high; i++) { 2059cd0e1443SSatish Balay if (rp[i] > bcol) break; 2060cd0e1443SSatish Balay if (rp[i] == bcol) { 20612d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 20622d61bbb3SSatish Balay goto finished; 2063cd0e1443SSatish Balay } 2064cd0e1443SSatish Balay } 206597e567efSBarry Smith *v++ = 0.0; 20662d61bbb3SSatish Balay finished:; 2067cd0e1443SSatish Balay } 2068cd0e1443SSatish Balay } 20693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2070cd0e1443SSatish Balay } 2071cd0e1443SSatish Balay 2072d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2073d71ae5a4SJacob Faibussowitsch { 207492c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2075e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2076c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2077d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2078ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2079dd6ea824SBarry Smith const PetscScalar *value = v; 20809d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 208192c4ed94SBarry Smith 20823a40ed3dSBarry Smith PetscFunctionBegin; 20830e324ae4SSatish Balay if (roworiented) { 20840e324ae4SSatish Balay stepval = (n - 1) * bs; 20850e324ae4SSatish Balay } else { 20860e324ae4SSatish Balay stepval = (m - 1) * bs; 20870e324ae4SSatish Balay } 208892c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 208992c4ed94SBarry Smith row = im[k]; 20905ef9f2a5SBarry Smith if (row < 0) continue; 20916bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 209292c4ed94SBarry Smith rp = aj + ai[row]; 20937dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 209492c4ed94SBarry Smith rmax = imax[row]; 209592c4ed94SBarry Smith nrow = ailen[row]; 209692c4ed94SBarry Smith low = 0; 2097c71e6ed7SBarry Smith high = nrow; 209892c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 20995ef9f2a5SBarry Smith if (in[l] < 0) continue; 21006bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 210192c4ed94SBarry Smith col = in[l]; 21027dc0baabSHong Zhang if (!A->structure_only) { 210392c4ed94SBarry Smith if (roworiented) { 210453ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21050e324ae4SSatish Balay } else { 210653ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 210792c4ed94SBarry Smith } 21087dc0baabSHong Zhang } 210926fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 211026fbe8dcSKarl Rupp else high = nrow; 2111e2ee6c50SBarry Smith lastcol = col; 211292c4ed94SBarry Smith while (high - low > 7) { 211392c4ed94SBarry Smith t = (low + high) / 2; 211492c4ed94SBarry Smith if (rp[t] > col) high = t; 211592c4ed94SBarry Smith else low = t; 211692c4ed94SBarry Smith } 211792c4ed94SBarry Smith for (i = low; i < high; i++) { 211892c4ed94SBarry Smith if (rp[i] > col) break; 211992c4ed94SBarry Smith if (rp[i] == col) { 21207dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21218a84c255SSatish Balay bap = ap + bs2 * i; 21220e324ae4SSatish Balay if (roworiented) { 21238a84c255SSatish Balay if (is == ADD_VALUES) { 2124dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2125ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2126dd9472c6SBarry Smith } 21270e324ae4SSatish Balay } else { 2128dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2129ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2130dd9472c6SBarry Smith } 2131dd9472c6SBarry Smith } 21320e324ae4SSatish Balay } else { 21330e324ae4SSatish Balay if (is == ADD_VALUES) { 213453ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2135ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 213653ef36baSBarry Smith bap += bs; 2137dd9472c6SBarry Smith } 21380e324ae4SSatish Balay } else { 213953ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2140ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 214153ef36baSBarry Smith bap += bs; 21428a84c255SSatish Balay } 2143dd9472c6SBarry Smith } 2144dd9472c6SBarry Smith } 2145f1241b54SBarry Smith goto noinsert2; 214692c4ed94SBarry Smith } 214792c4ed94SBarry Smith } 214889280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 21495f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 21507dc0baabSHong Zhang if (A->structure_only) { 21517dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 21527dc0baabSHong Zhang } else { 2153fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 21547dc0baabSHong Zhang } 21559371c9d4SSatish Balay N = nrow++ - 1; 21569371c9d4SSatish Balay high++; 215792c4ed94SBarry Smith /* shift up all the later entries in this row */ 21589566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 215992c4ed94SBarry Smith rp[i] = col; 21607dc0baabSHong Zhang if (!A->structure_only) { 21619566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 21628a84c255SSatish Balay bap = ap + bs2 * i; 21630e324ae4SSatish Balay if (roworiented) { 2164dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2165ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2166dd9472c6SBarry Smith } 21670e324ae4SSatish Balay } else { 2168dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2169ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2170dd9472c6SBarry Smith } 2171dd9472c6SBarry Smith } 21727dc0baabSHong Zhang } 2173f1241b54SBarry Smith noinsert2:; 217492c4ed94SBarry Smith low = i; 217592c4ed94SBarry Smith } 217692c4ed94SBarry Smith ailen[row] = nrow; 217792c4ed94SBarry Smith } 21783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 217992c4ed94SBarry Smith } 218026e093fcSHong Zhang 2181d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2182d71ae5a4SJacob Faibussowitsch { 2183584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2184580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2185d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2186c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 21873f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 21883447b6efSHong Zhang PetscReal ratio = 0.6; 2189584200bdSSatish Balay 21903a40ed3dSBarry Smith PetscFunctionBegin; 2191d32568d8SPierre Jolivet if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2192584200bdSSatish Balay 219343ee02c3SBarry Smith if (m) rmax = ailen[0]; 2194584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2195584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2196584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2197d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2198584200bdSSatish Balay if (fshift) { 2199580bdb30SBarry Smith ip = aj + ai[i]; 2200580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2201584200bdSSatish Balay N = ailen[i]; 22029566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 220348a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2204672ba085SHong Zhang } 2205584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2206584200bdSSatish Balay } 2207584200bdSSatish Balay if (mbs) { 2208584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2209584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2210584200bdSSatish Balay } 22117c565772SBarry Smith 2212584200bdSSatish Balay /* reset ilen and imax for each row */ 22137c565772SBarry Smith a->nonzerorowcnt = 0; 2214672ba085SHong Zhang if (A->structure_only) { 22159566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2216672ba085SHong Zhang } else { /* !A->structure_only */ 2217584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2218584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22197c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2220584200bdSSatish Balay } 2221672ba085SHong Zhang } 2222a7c10996SSatish Balay a->nz = ai[mbs]; 2223584200bdSSatish Balay 2224584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2225b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 22265f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 22279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 22289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 22299566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 223026fbe8dcSKarl Rupp 22318e58a170SBarry Smith A->info.mallocs += a->reallocs; 2232e2f3b5e9SSatish Balay a->reallocs = 0; 22330e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2234647a6520SHong Zhang a->rmax = rmax; 2235cf4441caSHong Zhang 223648a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 22373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2238584200bdSSatish Balay } 2239584200bdSSatish Balay 2240bea157c4SSatish Balay /* 2241bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2242bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2243a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2244bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2245bea157c4SSatish Balay */ 2246d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2247d71ae5a4SJacob Faibussowitsch { 2248ff6a9541SJacob Faibussowitsch PetscInt j = 0; 22493a40ed3dSBarry Smith 2250433994e6SBarry Smith PetscFunctionBegin; 2251ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; j++) { 2252ff6a9541SJacob Faibussowitsch PetscInt row = idx[i]; 2253a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2254bea157c4SSatish Balay sizes[j] = 1; 2255bea157c4SSatish Balay i++; 2256e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2257bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2258bea157c4SSatish Balay i++; 22596aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2260ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_TRUE; 2261ff6a9541SJacob Faibussowitsch for (PetscInt k = 1; k < bs; k++) { 2262bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2263bea157c4SSatish Balay flg = PETSC_FALSE; 2264bea157c4SSatish Balay break; 2265d9b7c43dSSatish Balay } 2266bea157c4SSatish Balay } 2267abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2268bea157c4SSatish Balay sizes[j] = bs; 2269bea157c4SSatish Balay i += bs; 2270bea157c4SSatish Balay } else { 2271bea157c4SSatish Balay sizes[j] = 1; 2272bea157c4SSatish Balay i++; 2273bea157c4SSatish Balay } 2274bea157c4SSatish Balay } 2275bea157c4SSatish Balay } 2276bea157c4SSatish Balay *bs_max = j; 22773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2278d9b7c43dSSatish Balay } 2279d9b7c43dSSatish Balay 2280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2281d71ae5a4SJacob Faibussowitsch { 2282d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2283f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2284d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 228587828ca2SBarry Smith PetscScalar zero = 0.0; 22863f1db9ecSBarry Smith MatScalar *aa; 228797b48c8fSBarry Smith const PetscScalar *xx; 228897b48c8fSBarry Smith PetscScalar *bb; 2289d9b7c43dSSatish Balay 22903a40ed3dSBarry Smith PetscFunctionBegin; 2291dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 229297b48c8fSBarry Smith if (x && b) { 22939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 22949566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 2295ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 22969566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 22979566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 229897b48c8fSBarry Smith } 229997b48c8fSBarry Smith 2300d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2301bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23029566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2303bea157c4SSatish Balay 2304563b5814SBarry Smith /* copy IS values to rows, and sort them */ 230526fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23069566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 230797b48c8fSBarry Smith 2308a9817697SBarry Smith if (baij->keepnonzeropattern) { 230926fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2310dffd3267SBarry Smith bs_max = is_n; 2311dffd3267SBarry Smith } else { 23129566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2313e56f5c9eSBarry Smith A->nonzerostate++; 2314dffd3267SBarry Smith } 2315bea157c4SSatish Balay 2316bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2317bea157c4SSatish Balay row = rows[j]; 23185f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2319bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2320835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2321a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2322d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2323bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2324bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2325bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 232626fbe8dcSKarl Rupp 23279566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2328a07cd24cSSatish Balay } 2329563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 23309927e4dfSBarry Smith for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2331f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2332bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2333f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2334bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 23356bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2336bea157c4SSatish Balay for (k = 0; k < count; k++) { 2337d9b7c43dSSatish Balay aa[0] = zero; 2338d9b7c43dSSatish Balay aa += bs; 2339d9b7c43dSSatish Balay } 23409927e4dfSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2341d9b7c43dSSatish Balay } 2342bea157c4SSatish Balay } 2343bea157c4SSatish Balay 23449566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 23459566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 23463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2347d9b7c43dSSatish Balay } 23481c351548SSatish Balay 2349ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2350d71ae5a4SJacob Faibussowitsch { 235197b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 235297b48c8fSBarry Smith PetscInt i, j, k, count; 235397b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 235497b48c8fSBarry Smith PetscScalar zero = 0.0; 235597b48c8fSBarry Smith MatScalar *aa; 235697b48c8fSBarry Smith const PetscScalar *xx; 235797b48c8fSBarry Smith PetscScalar *bb; 235856777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 235997b48c8fSBarry Smith 236097b48c8fSBarry Smith PetscFunctionBegin; 2361dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 236297b48c8fSBarry Smith if (x && b) { 23639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23649566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 236556777dd2SBarry Smith vecs = PETSC_TRUE; 236697b48c8fSBarry Smith } 236797b48c8fSBarry Smith 236897b48c8fSBarry Smith /* zero the columns */ 23699566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 237097b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 23715f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 237297b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 237397b48c8fSBarry Smith } 237497b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 237597b48c8fSBarry Smith if (!zeroed[i]) { 237697b48c8fSBarry Smith row = i / bs; 237797b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 237897b48c8fSBarry Smith for (k = 0; k < bs; k++) { 237997b48c8fSBarry Smith col = bs * baij->j[j] + k; 238097b48c8fSBarry Smith if (zeroed[col]) { 2381835f2295SStefano Zampini aa = baij->a + j * bs2 + (i % bs) + bs * k; 238256777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 238397b48c8fSBarry Smith aa[0] = 0.0; 238497b48c8fSBarry Smith } 238597b48c8fSBarry Smith } 238697b48c8fSBarry Smith } 238756777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 238897b48c8fSBarry Smith } 23899566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 239056777dd2SBarry Smith if (vecs) { 23919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 239356777dd2SBarry Smith } 239497b48c8fSBarry Smith 239597b48c8fSBarry Smith /* zero the rows */ 239697b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 239797b48c8fSBarry Smith row = is_idx[i]; 239897b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2399835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 240097b48c8fSBarry Smith for (k = 0; k < count; k++) { 240197b48c8fSBarry Smith aa[0] = zero; 240297b48c8fSBarry Smith aa += bs; 240397b48c8fSBarry Smith } 2404dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 240597b48c8fSBarry Smith } 24069566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 240897b48c8fSBarry Smith } 240997b48c8fSBarry Smith 2410d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2411d71ae5a4SJacob Faibussowitsch { 24122d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2413e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2414c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2415d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2416c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2417ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2418d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24192d61bbb3SSatish Balay 24202d61bbb3SSatish Balay PetscFunctionBegin; 24212d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2422085a36d4SBarry Smith row = im[k]; 2423085a36d4SBarry Smith brow = row / bs; 24245ef9f2a5SBarry Smith if (row < 0) continue; 24256bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24268e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]); 24278e3a54c0SPierre Jolivet if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 24282d61bbb3SSatish Balay rmax = imax[brow]; 24292d61bbb3SSatish Balay nrow = ailen[brow]; 24302d61bbb3SSatish Balay low = 0; 2431c71e6ed7SBarry Smith high = nrow; 24322d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 24335ef9f2a5SBarry Smith if (in[l] < 0) continue; 24346bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 24359371c9d4SSatish Balay col = in[l]; 24369371c9d4SSatish Balay bcol = col / bs; 24379371c9d4SSatish Balay ridx = row % bs; 24389371c9d4SSatish Balay cidx = col % bs; 2439672ba085SHong Zhang if (!A->structure_only) { 24402d61bbb3SSatish Balay if (roworiented) { 24415ef9f2a5SBarry Smith value = v[l + k * n]; 24422d61bbb3SSatish Balay } else { 24432d61bbb3SSatish Balay value = v[k + l * m]; 24442d61bbb3SSatish Balay } 2445672ba085SHong Zhang } 24469371c9d4SSatish Balay if (col <= lastcol) low = 0; 24479371c9d4SSatish Balay else high = nrow; 2448e2ee6c50SBarry Smith lastcol = col; 24492d61bbb3SSatish Balay while (high - low > 7) { 24502d61bbb3SSatish Balay t = (low + high) / 2; 24512d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 24522d61bbb3SSatish Balay else low = t; 24532d61bbb3SSatish Balay } 24542d61bbb3SSatish Balay for (i = low; i < high; i++) { 24552d61bbb3SSatish Balay if (rp[i] > bcol) break; 24562d61bbb3SSatish Balay if (rp[i] == bcol) { 24578e3a54c0SPierre Jolivet bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2458672ba085SHong Zhang if (!A->structure_only) { 24592d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 24602d61bbb3SSatish Balay else *bap = value; 2461672ba085SHong Zhang } 24622d61bbb3SSatish Balay goto noinsert1; 24632d61bbb3SSatish Balay } 24642d61bbb3SSatish Balay } 24652d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 24665f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2467672ba085SHong Zhang if (A->structure_only) { 2468672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2469672ba085SHong Zhang } else { 2470fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2471672ba085SHong Zhang } 24729371c9d4SSatish Balay N = nrow++ - 1; 24739371c9d4SSatish Balay high++; 24742d61bbb3SSatish Balay /* shift up all the later entries in this row */ 24759566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 24762d61bbb3SSatish Balay rp[i] = bcol; 2477580bdb30SBarry Smith if (!A->structure_only) { 24789566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 24799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2480580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2481580bdb30SBarry Smith } 2482085a36d4SBarry Smith a->nz++; 24832d61bbb3SSatish Balay noinsert1:; 24842d61bbb3SSatish Balay low = i; 24852d61bbb3SSatish Balay } 24862d61bbb3SSatish Balay ailen[brow] = nrow; 24872d61bbb3SSatish Balay } 24883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24892d61bbb3SSatish Balay } 24902d61bbb3SSatish Balay 2491ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2492d71ae5a4SJacob Faibussowitsch { 24932d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 24942d61bbb3SSatish Balay Mat outA; 2495ace3abfcSBarry Smith PetscBool row_identity, col_identity; 24962d61bbb3SSatish Balay 24972d61bbb3SSatish Balay PetscFunctionBegin; 24985f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 24999566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25009566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25015f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25022d61bbb3SSatish Balay 25032d61bbb3SSatish Balay outA = inA; 2504d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25059566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25069566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25072d61bbb3SSatish Balay 25089566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2510c3122656SLisandro Dalcin a->row = row; 25119566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2513c3122656SLisandro Dalcin a->col = col; 2514c38d4ed2SBarry Smith 2515c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25179566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2518c38d4ed2SBarry Smith 25199566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2520aa624791SPierre Jolivet if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 25219566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25232d61bbb3SSatish Balay } 2524d9b7c43dSSatish Balay 2525ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2526d71ae5a4SJacob Faibussowitsch { 252727a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 252827a8da17SBarry Smith 252927a8da17SBarry Smith PetscFunctionBegin; 2530ff6a9541SJacob Faibussowitsch baij->nz = baij->maxnz; 2531ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2532ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 25333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 253427a8da17SBarry Smith } 253527a8da17SBarry Smith 253627a8da17SBarry Smith /*@ 2537d8a51d2aSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 253827a8da17SBarry Smith 253927a8da17SBarry Smith Input Parameters: 254011a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix 2541d8a51d2aSBarry Smith - indices - the block column indices 254227a8da17SBarry Smith 254315091d37SBarry Smith Level: advanced 254415091d37SBarry Smith 254527a8da17SBarry Smith Notes: 254627a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 254727a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 254811a5261eSBarry Smith of the `MatSetValues()` operation. 254927a8da17SBarry Smith 255027a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 255111a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 255227a8da17SBarry Smith 255311a5261eSBarry Smith MUST be called before any calls to `MatSetValues()` 255427a8da17SBarry Smith 25551cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 255627a8da17SBarry Smith @*/ 2557d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2558d71ae5a4SJacob Faibussowitsch { 255927a8da17SBarry Smith PetscFunctionBegin; 25600700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 25614f572ea9SToby Isaac PetscAssertPointer(indices, 2); 2562810441c8SPierre Jolivet PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 25633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 256427a8da17SBarry Smith } 256527a8da17SBarry Smith 256666976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2567d71ae5a4SJacob Faibussowitsch { 2568273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2569c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2570273d9f13SBarry Smith PetscReal atmp; 257187828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2572273d9f13SBarry Smith MatScalar *aa; 2573c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2574273d9f13SBarry Smith 2575273d9f13SBarry Smith PetscFunctionBegin; 25765f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2577d0f46423SBarry Smith bs = A->rmap->bs; 2578273d9f13SBarry Smith aa = a->a; 2579273d9f13SBarry Smith ai = a->i; 2580273d9f13SBarry Smith aj = a->j; 2581273d9f13SBarry Smith mbs = a->mbs; 2582273d9f13SBarry Smith 25839566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 25849566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 25859566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 25865f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2587273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 25889371c9d4SSatish Balay ncols = ai[1] - ai[0]; 25899371c9d4SSatish Balay ai++; 2590273d9f13SBarry Smith brow = bs * i; 2591273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2592273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2593273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 25949371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 25959371c9d4SSatish Balay aa++; 2596273d9f13SBarry Smith row = brow + krow; /* row index */ 25979371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 25989371c9d4SSatish Balay x[row] = atmp; 25999371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26009371c9d4SSatish Balay } 2601273d9f13SBarry Smith } 2602273d9f13SBarry Smith } 2603273d9f13SBarry Smith aj++; 2604273d9f13SBarry Smith } 2605273d9f13SBarry Smith } 26069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 26073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2608273d9f13SBarry Smith } 2609273d9f13SBarry Smith 2610eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2611eede4a3fSMark Adams { 2612eede4a3fSMark Adams Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2613eede4a3fSMark Adams PetscInt i, j, n, row, bs, *ai, mbs; 2614eede4a3fSMark Adams PetscReal atmp; 2615eede4a3fSMark Adams PetscScalar *x, zero = 0.0; 2616eede4a3fSMark Adams MatScalar *aa; 2617eede4a3fSMark Adams PetscInt ncols, brow, krow, kcol; 2618eede4a3fSMark Adams 2619eede4a3fSMark Adams PetscFunctionBegin; 2620eede4a3fSMark Adams PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2621eede4a3fSMark Adams bs = A->rmap->bs; 2622eede4a3fSMark Adams aa = a->a; 2623eede4a3fSMark Adams ai = a->i; 2624eede4a3fSMark Adams mbs = a->mbs; 2625eede4a3fSMark Adams 2626eede4a3fSMark Adams PetscCall(VecSet(v, zero)); 2627eede4a3fSMark Adams PetscCall(VecGetArrayWrite(v, &x)); 2628eede4a3fSMark Adams PetscCall(VecGetLocalSize(v, &n)); 2629eede4a3fSMark Adams PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2630eede4a3fSMark Adams for (i = 0; i < mbs; i++) { 2631eede4a3fSMark Adams ncols = ai[1] - ai[0]; 2632eede4a3fSMark Adams ai++; 2633eede4a3fSMark Adams brow = bs * i; 2634eede4a3fSMark Adams for (j = 0; j < ncols; j++) { 2635eede4a3fSMark Adams for (kcol = 0; kcol < bs; kcol++) { 2636eede4a3fSMark Adams for (krow = 0; krow < bs; krow++) { 2637eede4a3fSMark Adams atmp = PetscAbsScalar(*aa); 2638eede4a3fSMark Adams aa++; 2639eede4a3fSMark Adams row = brow + krow; /* row index */ 2640eede4a3fSMark Adams x[row] += atmp; 2641eede4a3fSMark Adams } 2642eede4a3fSMark Adams } 2643eede4a3fSMark Adams } 2644eede4a3fSMark Adams } 2645eede4a3fSMark Adams PetscCall(VecRestoreArrayWrite(v, &x)); 2646eede4a3fSMark Adams PetscFunctionReturn(PETSC_SUCCESS); 2647eede4a3fSMark Adams } 2648eede4a3fSMark Adams 264966976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2650d71ae5a4SJacob Faibussowitsch { 26513c896bc6SHong Zhang PetscFunctionBegin; 26523c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 26533c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 26543c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26553c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2656d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 26573c896bc6SHong Zhang 26585f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 26595f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 26609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 26619566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 26623c896bc6SHong Zhang } else { 26639566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 26643c896bc6SHong Zhang } 26653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26663c896bc6SHong Zhang } 26673c896bc6SHong Zhang 2668d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2669d71ae5a4SJacob Faibussowitsch { 2670f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26716e111a19SKarl Rupp 2672f2a5309cSSatish Balay PetscFunctionBegin; 2673f2a5309cSSatish Balay *array = a->a; 26743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2675f2a5309cSSatish Balay } 2676f2a5309cSSatish Balay 2677d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2678d71ae5a4SJacob Faibussowitsch { 2679f2a5309cSSatish Balay PetscFunctionBegin; 2680cda14afcSprj- *array = NULL; 26813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2682f2a5309cSSatish Balay } 2683f2a5309cSSatish Balay 2684d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2685d71ae5a4SJacob Faibussowitsch { 2686b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 268752768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 268852768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 268952768537SHong Zhang 269052768537SHong Zhang PetscFunctionBegin; 269152768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 26929566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 26933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 269452768537SHong Zhang } 269552768537SHong Zhang 2696d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2697d71ae5a4SJacob Faibussowitsch { 269842ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 269931ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2700e838b9e7SJed Brown PetscBLASInt one = 1; 270142ee4b1aSHong Zhang 270242ee4b1aSHong Zhang PetscFunctionBegin; 2703134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2704134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2705134adf20SPierre Jolivet if (e) { 27069566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2707134adf20SPierre Jolivet if (e) { 27089566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2709134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2710134adf20SPierre Jolivet } 2711134adf20SPierre Jolivet } 271254c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2713134adf20SPierre Jolivet } 271442ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2715f4df32b1SMatthew Knepley PetscScalar alpha = a; 2716c5df96a5SBarry Smith PetscBLASInt bnz; 27179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2718792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 27199566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2720ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 27219566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 272242ee4b1aSHong Zhang } else { 272352768537SHong Zhang Mat B; 272452768537SHong Zhang PetscInt *nnz; 272554c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 27279566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 27289566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 27299566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 27309566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 27319566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 27329566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 27339566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 27349566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 27359566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 27369566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 273742ee4b1aSHong Zhang } 27383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 273942ee4b1aSHong Zhang } 274042ee4b1aSHong Zhang 2741d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2742d71ae5a4SJacob Faibussowitsch { 2743ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 27442726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27452726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 27462726fb6dSPierre Jolivet MatScalar *aa = a->a; 27472726fb6dSPierre Jolivet 27482726fb6dSPierre Jolivet PetscFunctionBegin; 27492726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 27503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2751ff6a9541SJacob Faibussowitsch #else 2752ff6a9541SJacob Faibussowitsch (void)A; 2753ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2754ff6a9541SJacob Faibussowitsch #endif 27552726fb6dSPierre Jolivet } 27562726fb6dSPierre Jolivet 2757ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2758d71ae5a4SJacob Faibussowitsch { 2759ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 276099cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 276199cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2762dd6ea824SBarry Smith MatScalar *aa = a->a; 276399cafbc1SBarry Smith 276499cafbc1SBarry Smith PetscFunctionBegin; 276599cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 27663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2767ff6a9541SJacob Faibussowitsch #else 2768ff6a9541SJacob Faibussowitsch (void)A; 2769ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2770ff6a9541SJacob Faibussowitsch #endif 277199cafbc1SBarry Smith } 277299cafbc1SBarry Smith 2773ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2774d71ae5a4SJacob Faibussowitsch { 2775ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 277699cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 277799cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2778dd6ea824SBarry Smith MatScalar *aa = a->a; 277999cafbc1SBarry Smith 278099cafbc1SBarry Smith PetscFunctionBegin; 278199cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 27823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2783ff6a9541SJacob Faibussowitsch #else 2784ff6a9541SJacob Faibussowitsch (void)A; 2785ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2786ff6a9541SJacob Faibussowitsch #endif 278799cafbc1SBarry Smith } 278899cafbc1SBarry Smith 27893acb8795SBarry Smith /* 27902479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 27913acb8795SBarry Smith */ 2792ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2793d71ae5a4SJacob Faibussowitsch { 27943acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27953acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 27963acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 27973acb8795SBarry Smith 27983acb8795SBarry Smith PetscFunctionBegin; 27993acb8795SBarry Smith *nn = n; 28003ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28015f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 28029566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28053acb8795SBarry Smith jj = a->j; 2806ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 28073acb8795SBarry Smith cia[0] = oshift; 2808ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28099566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 28103acb8795SBarry Smith jj = a->j; 28113acb8795SBarry Smith for (row = 0; row < m; row++) { 28123acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 28133acb8795SBarry Smith for (i = 0; i < mr; i++) { 28143acb8795SBarry Smith col = *jj++; 281526fbe8dcSKarl Rupp 28163acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 28173acb8795SBarry Smith } 28183acb8795SBarry Smith } 28199566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 28209371c9d4SSatish Balay *ia = cia; 28219371c9d4SSatish Balay *ja = cja; 28223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28233acb8795SBarry Smith } 28243acb8795SBarry Smith 2825ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2826d71ae5a4SJacob Faibussowitsch { 28273acb8795SBarry Smith PetscFunctionBegin; 28283ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28299566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 28309566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 28313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28323acb8795SBarry Smith } 28333acb8795SBarry Smith 2834525d23c0SHong Zhang /* 2835525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2836525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2837040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2838525d23c0SHong Zhang */ 2839d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2840d71ae5a4SJacob Faibussowitsch { 2841525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2842c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2843525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2844525d23c0SHong Zhang PetscInt *cspidx; 2845f6d58c54SBarry Smith 2846f6d58c54SBarry Smith PetscFunctionBegin; 2847525d23c0SHong Zhang *nn = n; 28483ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2849f6d58c54SBarry Smith 28509566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2854525d23c0SHong Zhang jj = a->j; 2855ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 2856525d23c0SHong Zhang cia[0] = oshift; 2857ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28589566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2859525d23c0SHong Zhang jj = a->j; 2860525d23c0SHong Zhang for (row = 0; row < m; row++) { 2861525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2862525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2863525d23c0SHong Zhang col = *jj++; 2864525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2865525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2866525d23c0SHong Zhang } 2867525d23c0SHong Zhang } 28689566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2869071fcb05SBarry Smith *ia = cia; 2870071fcb05SBarry Smith *ja = cja; 2871525d23c0SHong Zhang *spidx = cspidx; 28723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2873f6d58c54SBarry Smith } 2874f6d58c54SBarry Smith 2875d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2876d71ae5a4SJacob Faibussowitsch { 2877525d23c0SHong Zhang PetscFunctionBegin; 28789566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 28799566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 28803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2881f6d58c54SBarry Smith } 288299cafbc1SBarry Smith 288366976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2884d71ae5a4SJacob Faibussowitsch { 28857d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 28867d68702bSBarry Smith 28877d68702bSBarry Smith PetscFunctionBegin; 288848a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 28899566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 28903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28917d68702bSBarry Smith } 28927d68702bSBarry Smith 289317ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 289417ea310bSPierre Jolivet { 289517ea310bSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 289617ea310bSPierre Jolivet PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 289717ea310bSPierre Jolivet PetscInt m = A->rmap->N, *ailen = a->ilen; 289817ea310bSPierre Jolivet PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 289917ea310bSPierre Jolivet MatScalar *aa = a->a, *ap; 290017ea310bSPierre Jolivet PetscBool zero; 290117ea310bSPierre Jolivet 290217ea310bSPierre Jolivet PetscFunctionBegin; 290317ea310bSPierre Jolivet PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 290417ea310bSPierre Jolivet if (m) rmax = ailen[0]; 290517ea310bSPierre Jolivet for (i = 1; i <= mbs; i++) { 290617ea310bSPierre Jolivet for (k = ai[i - 1]; k < ai[i]; k++) { 290717ea310bSPierre Jolivet zero = PETSC_TRUE; 290817ea310bSPierre Jolivet ap = aa + bs2 * k; 290917ea310bSPierre Jolivet for (j = 0; j < bs2 && zero; j++) { 291017ea310bSPierre Jolivet if (ap[j] != 0.0) zero = PETSC_FALSE; 291117ea310bSPierre Jolivet } 291217ea310bSPierre Jolivet if (zero && (aj[k] != i - 1 || !keep)) fshift++; 291317ea310bSPierre Jolivet else { 291417ea310bSPierre Jolivet if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 291517ea310bSPierre Jolivet aj[k - fshift] = aj[k]; 291617ea310bSPierre Jolivet PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 291717ea310bSPierre Jolivet } 291817ea310bSPierre Jolivet } 291917ea310bSPierre Jolivet ai[i - 1] -= fshift_prev; 292017ea310bSPierre Jolivet fshift_prev = fshift; 292117ea310bSPierre Jolivet ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 292217ea310bSPierre Jolivet a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 292317ea310bSPierre Jolivet rmax = PetscMax(rmax, ailen[i - 1]); 292417ea310bSPierre Jolivet } 292517ea310bSPierre Jolivet if (fshift) { 292617ea310bSPierre Jolivet if (mbs) { 292717ea310bSPierre Jolivet ai[mbs] -= fshift; 292817ea310bSPierre Jolivet a->nz = ai[mbs]; 292917ea310bSPierre Jolivet } 293017ea310bSPierre Jolivet PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 293117ea310bSPierre Jolivet A->nonzerostate++; 293217ea310bSPierre Jolivet A->info.nz_unneeded += (PetscReal)fshift; 293317ea310bSPierre Jolivet a->rmax = rmax; 293417ea310bSPierre Jolivet PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 293517ea310bSPierre Jolivet PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 293617ea310bSPierre Jolivet } 293717ea310bSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS); 293817ea310bSPierre Jolivet } 293917ea310bSPierre Jolivet 2940dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2941cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2942cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2943cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 294497304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 29457c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 29467c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2947f4259b30SLisandro Dalcin NULL, 2948f4259b30SLisandro Dalcin NULL, 2949f4259b30SLisandro Dalcin NULL, 2950f4259b30SLisandro Dalcin /* 10*/ NULL, 2951cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2952f4259b30SLisandro Dalcin NULL, 2953f4259b30SLisandro Dalcin NULL, 2954f2501298SSatish Balay MatTranspose_SeqBAIJ, 295597304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2956cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2957cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2958cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2959cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2960f4259b30SLisandro Dalcin /* 20*/ NULL, 2961cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2962cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2963cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2964d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2965f4259b30SLisandro Dalcin NULL, 2966f4259b30SLisandro Dalcin NULL, 2967f4259b30SLisandro Dalcin NULL, 2968f4259b30SLisandro Dalcin NULL, 296926cec326SBarry Smith /* 29*/ MatSetUp_Seq_Hash, 2970f4259b30SLisandro Dalcin NULL, 2971f4259b30SLisandro Dalcin NULL, 2972f4259b30SLisandro Dalcin NULL, 2973f4259b30SLisandro Dalcin NULL, 2974d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2975f4259b30SLisandro Dalcin NULL, 2976f4259b30SLisandro Dalcin NULL, 2977cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2978f4259b30SLisandro Dalcin NULL, 2979d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 29807dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2981cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2982cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 29833c896bc6SHong Zhang MatCopy_SeqBAIJ, 2984f4259b30SLisandro Dalcin /* 44*/ NULL, 2985cc2dc46cSBarry Smith MatScale_SeqBAIJ, 29867d68702bSBarry Smith MatShift_SeqBAIJ, 2987f4259b30SLisandro Dalcin NULL, 298897b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2989f4259b30SLisandro Dalcin /* 49*/ NULL, 29903b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 299192c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 29923acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 29933acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 299493dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2995f4259b30SLisandro Dalcin NULL, 2996f4259b30SLisandro Dalcin NULL, 2997090001bdSToby Isaac NULL, 2998d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 29997dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3000b9b97703SBarry Smith MatDestroy_SeqBAIJ, 3001b9b97703SBarry Smith MatView_SeqBAIJ, 3002f4259b30SLisandro Dalcin NULL, 3003f4259b30SLisandro Dalcin NULL, 3004f4259b30SLisandro Dalcin /* 64*/ NULL, 3005f4259b30SLisandro Dalcin NULL, 3006f4259b30SLisandro Dalcin NULL, 3007f4259b30SLisandro Dalcin NULL, 30088bb0f5c6SPierre Jolivet MatGetRowMaxAbs_SeqBAIJ, 30098bb0f5c6SPierre Jolivet /* 69*/ NULL, 3010c87e5d42SMatthew Knepley MatConvert_Basic, 3011f4259b30SLisandro Dalcin NULL, 3012f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 3013f4259b30SLisandro Dalcin NULL, 30148bb0f5c6SPierre Jolivet /* 74*/ NULL, 3015f4259b30SLisandro Dalcin NULL, 3016f4259b30SLisandro Dalcin NULL, 3017f4259b30SLisandro Dalcin NULL, 30185bba2384SShri Abhyankar MatLoad_SeqBAIJ, 30198bb0f5c6SPierre Jolivet /* 79*/ NULL, 30208bb0f5c6SPierre Jolivet NULL, 30218bb0f5c6SPierre Jolivet NULL, 30228bb0f5c6SPierre Jolivet NULL, 30238bb0f5c6SPierre Jolivet NULL, 3024f4259b30SLisandro Dalcin /* 84*/ NULL, 3025f4259b30SLisandro Dalcin NULL, 3026f4259b30SLisandro Dalcin NULL, 3027f4259b30SLisandro Dalcin NULL, 3028f4259b30SLisandro Dalcin NULL, 3029f4259b30SLisandro Dalcin /* 89*/ NULL, 3030f4259b30SLisandro Dalcin NULL, 3031f4259b30SLisandro Dalcin NULL, 3032f4259b30SLisandro Dalcin NULL, 30338bb0f5c6SPierre Jolivet MatConjugate_SeqBAIJ, 3034f4259b30SLisandro Dalcin /* 94*/ NULL, 3035f4259b30SLisandro Dalcin NULL, 30368bb0f5c6SPierre Jolivet MatRealPart_SeqBAIJ, 30378bb0f5c6SPierre Jolivet MatImaginaryPart_SeqBAIJ, 3038f4259b30SLisandro Dalcin NULL, 3039f4259b30SLisandro Dalcin /* 99*/ NULL, 3040f4259b30SLisandro Dalcin NULL, 3041f4259b30SLisandro Dalcin NULL, 3042f4259b30SLisandro Dalcin NULL, 30438bb0f5c6SPierre Jolivet NULL, 3044*421480d9SBarry Smith /*104*/ NULL, 30458bb0f5c6SPierre Jolivet NULL, 30468bb0f5c6SPierre Jolivet NULL, 3047f4259b30SLisandro Dalcin NULL, 3048f4259b30SLisandro Dalcin NULL, 3049f4259b30SLisandro Dalcin /*109*/ NULL, 3050f4259b30SLisandro Dalcin NULL, 3051547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 3052d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 3053f4259b30SLisandro Dalcin NULL, 3054*421480d9SBarry Smith /*114*/ NULL, 3055857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 30563964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 3057f4259b30SLisandro Dalcin NULL, 3058*421480d9SBarry Smith NULL, 30598bb0f5c6SPierre Jolivet /*119*/ NULL, 3060f4259b30SLisandro Dalcin NULL, 3061f4259b30SLisandro Dalcin NULL, 3062f4259b30SLisandro Dalcin NULL, 3063f4259b30SLisandro Dalcin NULL, 30648bb0f5c6SPierre Jolivet /*124*/ NULL, 30658bb0f5c6SPierre Jolivet NULL, 30668bb0f5c6SPierre Jolivet MatSetBlockSizes_Default, 30678bb0f5c6SPierre Jolivet NULL, 3068*421480d9SBarry Smith MatFDColoringSetUp_SeqXAIJ, 3069*421480d9SBarry Smith /*129*/ NULL, 30708bb0f5c6SPierre Jolivet MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 30718bb0f5c6SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 30728bb0f5c6SPierre Jolivet NULL, 3073f4259b30SLisandro Dalcin NULL, 3074*421480d9SBarry Smith /*134*/ NULL, 3075f4259b30SLisandro Dalcin NULL, 3076eede4a3fSMark Adams MatEliminateZeros_SeqBAIJ, 30774cc2b5b5SPierre Jolivet MatGetRowSumAbs_SeqBAIJ, 307842ce410bSJunchao Zhang NULL, 3079*421480d9SBarry Smith /*139*/ NULL, 308042ce410bSJunchao Zhang NULL, 308103db1824SAlex Lindsay MatCopyHashToXAIJ_Seq_Hash, 3082c2be7ffeSStefano Zampini NULL, 308303db1824SAlex Lindsay NULL}; 30842593348eSBarry Smith 3085ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3086d71ae5a4SJacob Faibussowitsch { 30873e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30888ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30893e90b805SBarry Smith 30903e90b805SBarry Smith PetscFunctionBegin; 30915f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30923e90b805SBarry Smith 30933e90b805SBarry Smith /* allocate space for values if not already there */ 3094ff6a9541SJacob Faibussowitsch if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 30953e90b805SBarry Smith 30963e90b805SBarry Smith /* copy values over */ 30979566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 30983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30993e90b805SBarry Smith } 31003e90b805SBarry Smith 3101ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3102d71ae5a4SJacob Faibussowitsch { 31033e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31048ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31053e90b805SBarry Smith 31063e90b805SBarry Smith PetscFunctionBegin; 31075f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31085f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 31093e90b805SBarry Smith 31103e90b805SBarry Smith /* copy values over */ 31119566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 31123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31133e90b805SBarry Smith } 31143e90b805SBarry Smith 3115cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3116cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3117273d9f13SBarry Smith 3118f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3119d71ae5a4SJacob Faibussowitsch { 3120ad79cf63SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3121535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 31228afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3123a23d5eceSKris Buschelman 3124a23d5eceSKris Buschelman PetscFunctionBegin; 3125ad79cf63SBarry Smith if (B->hash_active) { 3126ad79cf63SBarry Smith PetscInt bs; 3127aea10558SJacob Faibussowitsch B->ops[0] = b->cops; 3128ad79cf63SBarry Smith PetscCall(PetscHMapIJVDestroy(&b->ht)); 3129ad79cf63SBarry Smith PetscCall(MatGetBlockSize(B, &bs)); 3130ad79cf63SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3131ad79cf63SBarry Smith PetscCall(PetscFree(b->dnz)); 3132ad79cf63SBarry Smith PetscCall(PetscFree(b->bdnz)); 3133ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 3134ad79cf63SBarry Smith } 31352576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3136ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3137ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3138ab93d7beSBarry Smith nz = 0; 3139ab93d7beSBarry Smith } 31408c07d4e3SBarry Smith 314158b7e2c1SStefano Zampini PetscCall(MatSetBlockSize(B, bs)); 31429566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 31439566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 31449566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3145899cda47SBarry Smith 3146899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3147899cda47SBarry Smith 3148d0f46423SBarry Smith mbs = B->rmap->n / bs; 3149d0f46423SBarry Smith nbs = B->cmap->n / bs; 3150a23d5eceSKris Buschelman bs2 = bs * bs; 3151a23d5eceSKris Buschelman 31525f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3153a23d5eceSKris Buschelman 3154a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 31555f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3156a23d5eceSKris Buschelman if (nnz) { 3157a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 31585f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 31595f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3160a23d5eceSKris Buschelman } 3161a23d5eceSKris Buschelman } 3162a23d5eceSKris Buschelman 3163d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 31649566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3165d0609cedSBarry Smith PetscOptionsEnd(); 31668c07d4e3SBarry Smith 3167a23d5eceSKris Buschelman if (!flg) { 3168a23d5eceSKris Buschelman switch (bs) { 3169a23d5eceSKris Buschelman case 1: 3170a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3171a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3172a23d5eceSKris Buschelman break; 3173a23d5eceSKris Buschelman case 2: 3174a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3175a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3176a23d5eceSKris Buschelman break; 3177a23d5eceSKris Buschelman case 3: 3178a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3179a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3180a23d5eceSKris Buschelman break; 3181a23d5eceSKris Buschelman case 4: 3182a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3183a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3184a23d5eceSKris Buschelman break; 3185a23d5eceSKris Buschelman case 5: 3186a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3187a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3188a23d5eceSKris Buschelman break; 3189a23d5eceSKris Buschelman case 6: 3190a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3191a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3192a23d5eceSKris Buschelman break; 3193a23d5eceSKris Buschelman case 7: 3194a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3195a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3196a23d5eceSKris Buschelman break; 31979371c9d4SSatish Balay case 9: { 31986679dcc1SBarry Smith PetscInt version = 1; 31999566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32006679dcc1SBarry Smith switch (version) { 32015f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32026679dcc1SBarry Smith case 1: 320396e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 320496e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3205835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32066679dcc1SBarry Smith break; 32076679dcc1SBarry Smith #endif 32086679dcc1SBarry Smith default: 320996e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 321096e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3211835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 321296e086a2SDaniel Kokron break; 32136679dcc1SBarry Smith } 32146679dcc1SBarry Smith break; 32156679dcc1SBarry Smith } 3216ebada01fSBarry Smith case 11: 3217ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3218ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3219ebada01fSBarry Smith break; 32209371c9d4SSatish Balay case 12: { 32216679dcc1SBarry Smith PetscInt version = 1; 32229566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32236679dcc1SBarry Smith switch (version) { 32246679dcc1SBarry Smith case 1: 32256679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 32266679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3227835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32288ab949d8SShri Abhyankar break; 32296679dcc1SBarry Smith case 2: 32306679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 32316679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3232835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32336679dcc1SBarry Smith break; 32346679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32356679dcc1SBarry Smith case 3: 32366679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 32376679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3238835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32396679dcc1SBarry Smith break; 32406679dcc1SBarry Smith #endif 3241a23d5eceSKris Buschelman default: 3242a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3243a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3244835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32456679dcc1SBarry Smith break; 32466679dcc1SBarry Smith } 32476679dcc1SBarry Smith break; 32486679dcc1SBarry Smith } 32499371c9d4SSatish Balay case 15: { 32506679dcc1SBarry Smith PetscInt version = 1; 32519566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32526679dcc1SBarry Smith switch (version) { 32536679dcc1SBarry Smith case 1: 32546679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3255835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32566679dcc1SBarry Smith break; 32576679dcc1SBarry Smith case 2: 32586679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3259835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32606679dcc1SBarry Smith break; 32616679dcc1SBarry Smith case 3: 32626679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3263835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32646679dcc1SBarry Smith break; 32656679dcc1SBarry Smith case 4: 32666679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3267835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32686679dcc1SBarry Smith break; 32696679dcc1SBarry Smith default: 32706679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 3271835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32726679dcc1SBarry Smith break; 32736679dcc1SBarry Smith } 32746679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32756679dcc1SBarry Smith break; 32766679dcc1SBarry Smith } 32776679dcc1SBarry Smith default: 32786679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 32796679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3280835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3281a23d5eceSKris Buschelman break; 3282a23d5eceSKris Buschelman } 3283a23d5eceSKris Buschelman } 3284e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3285a23d5eceSKris Buschelman b->mbs = mbs; 3286a23d5eceSKris Buschelman b->nbs = nbs; 3287ab93d7beSBarry Smith if (!skipallocation) { 32882ee49352SLisandro Dalcin if (!b->imax) { 32899566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 329026fbe8dcSKarl Rupp 32914fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 32922ee49352SLisandro Dalcin } 3293ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 329426fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3295a23d5eceSKris Buschelman if (!nnz) { 3296a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3297c62bd62aSJed Brown else if (nz < 0) nz = 1; 32985d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3299a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 33009566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3301a23d5eceSKris Buschelman } else { 3302c73702f5SBarry Smith PetscInt64 nz64 = 0; 33039371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 33049371c9d4SSatish Balay b->imax[i] = nnz[i]; 33059371c9d4SSatish Balay nz64 += nnz[i]; 33069371c9d4SSatish Balay } 33079566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3308a23d5eceSKris Buschelman } 3309a23d5eceSKris Buschelman 3310a23d5eceSKris Buschelman /* allocate the matrix space */ 33119566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 33129f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 33139f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3314672ba085SHong Zhang if (B->structure_only) { 33159f0612e4SBarry Smith b->free_a = PETSC_FALSE; 3316672ba085SHong Zhang } else { 33176679dcc1SBarry Smith PetscInt nzbs2 = 0; 33189566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 33199f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 33209f0612e4SBarry Smith b->free_a = PETSC_TRUE; 33219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->a, nz * bs2)); 3322672ba085SHong Zhang } 3323672ba085SHong Zhang b->free_ij = PETSC_TRUE; 33249f0612e4SBarry Smith PetscCall(PetscArrayzero(b->j, nz)); 3325672ba085SHong Zhang 3326a23d5eceSKris Buschelman b->i[0] = 0; 3327ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3328e811da20SHong Zhang } else { 3329e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3330e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3331ab93d7beSBarry Smith } 3332a23d5eceSKris Buschelman 3333a23d5eceSKris Buschelman b->bs2 = bs2; 3334a23d5eceSKris Buschelman b->mbs = mbs; 3335a23d5eceSKris Buschelman b->nz = 0; 3336b32cb4a7SJed Brown b->maxnz = nz; 3337b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3338cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3339cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 33409566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 33413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3342a23d5eceSKris Buschelman } 3343a23d5eceSKris Buschelman 334466976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3345d71ae5a4SJacob Faibussowitsch { 3346725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3347f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3348d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3349725b52f3SLisandro Dalcin 3350725b52f3SLisandro Dalcin PetscFunctionBegin; 33515f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 33529566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 33539566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 33549566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 33559566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 33569566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3357d0f46423SBarry Smith m = B->rmap->n / bs; 3358725b52f3SLisandro Dalcin 33595f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 33609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3361725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3362cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 33635f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3364725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3365725b52f3SLisandro Dalcin nnz[i] = nz; 3366725b52f3SLisandro Dalcin } 33679566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 33689566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3369725b52f3SLisandro Dalcin 3370725b52f3SLisandro Dalcin values = (PetscScalar *)V; 337148a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3372725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3373cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3374cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3375bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3376cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 33779566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 33783adadaf3SJed Brown } else { 33793adadaf3SJed Brown PetscInt j; 33803adadaf3SJed Brown for (j = 0; j < ncols; j++) { 33813adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 33829566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 33833adadaf3SJed Brown } 33843adadaf3SJed Brown } 3385725b52f3SLisandro Dalcin } 33869566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 33879566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 33889566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 33899566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 33903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3391725b52f3SLisandro Dalcin } 3392725b52f3SLisandro Dalcin 3393cda14afcSprj- /*@C 339411a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3395cda14afcSprj- 3396cda14afcSprj- Not Collective 3397cda14afcSprj- 3398cda14afcSprj- Input Parameter: 3399fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix 3400cda14afcSprj- 3401cda14afcSprj- Output Parameter: 3402cda14afcSprj- . array - pointer to the data 3403cda14afcSprj- 3404cda14afcSprj- Level: intermediate 3405cda14afcSprj- 34061cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3407cda14afcSprj- @*/ 34085d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3409d71ae5a4SJacob Faibussowitsch { 3410cda14afcSprj- PetscFunctionBegin; 3411cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 34123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3413cda14afcSprj- } 3414cda14afcSprj- 3415cda14afcSprj- /*@C 341611a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3417cda14afcSprj- 3418cda14afcSprj- Not Collective 3419cda14afcSprj- 3420cda14afcSprj- Input Parameters: 3421fe59aa6dSJacob Faibussowitsch + A - a `MATSEQBAIJ` matrix 3422cda14afcSprj- - array - pointer to the data 3423cda14afcSprj- 3424cda14afcSprj- Level: intermediate 3425cda14afcSprj- 34261cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3427cda14afcSprj- @*/ 34285d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3429d71ae5a4SJacob Faibussowitsch { 3430cda14afcSprj- PetscFunctionBegin; 3431cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 34323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3433cda14afcSprj- } 3434cda14afcSprj- 34350bad9183SKris Buschelman /*MC 3436fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 34370bad9183SKris Buschelman block sparse compressed row format. 34380bad9183SKris Buschelman 34390bad9183SKris Buschelman Options Database Keys: 344020f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 34416679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 34420bad9183SKris Buschelman 34430bad9183SKris Buschelman Level: beginner 34440cd7f59aSBarry Smith 34450cd7f59aSBarry Smith Notes: 344611a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 344711a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 34480bad9183SKris Buschelman 34492ef1f0ffSBarry Smith Run with `-info` to see what version of the matrix-vector product is being used 34506679dcc1SBarry Smith 34511cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 34520bad9183SKris Buschelman M*/ 34530bad9183SKris Buschelman 3454cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3455b24902e0SBarry Smith 3456d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3457d71ae5a4SJacob Faibussowitsch { 3458c1ac3661SBarry Smith PetscMPIInt size; 3459b6490206SBarry Smith Mat_SeqBAIJ *b; 34603b2fbd54SBarry Smith 34613a40ed3dSBarry Smith PetscFunctionBegin; 34629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 34635f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3464b6490206SBarry Smith 34654dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 3466b0a32e0cSBarry Smith B->data = (void *)b; 3467aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values; 346826fbe8dcSKarl Rupp 3469f4259b30SLisandro Dalcin b->row = NULL; 3470f4259b30SLisandro Dalcin b->col = NULL; 3471f4259b30SLisandro Dalcin b->icol = NULL; 34722593348eSBarry Smith b->reallocs = 0; 3473f4259b30SLisandro Dalcin b->saved_values = NULL; 34742593348eSBarry Smith 3475c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 34762593348eSBarry Smith b->nonew = 0; 3477f4259b30SLisandro Dalcin b->diag = NULL; 3478f4259b30SLisandro Dalcin B->spptr = NULL; 3479b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3480a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 34814e220ebcSLois Curfman McInnes 34829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 34839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 34849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 34859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 34869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 34879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 34889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 34899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 34909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 34919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 34927ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 34939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 34947ea3e4caSstefano_zampini #endif 34959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 34969566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 34973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34982593348eSBarry Smith } 34992593348eSBarry Smith 3500d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3501d71ae5a4SJacob Faibussowitsch { 3502b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3503a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3504de6a44a3SBarry Smith 35053a40ed3dSBarry Smith PetscFunctionBegin; 350631fe6a7dSBarry Smith PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 35075f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 35082593348eSBarry Smith 35094fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35104fd072dbSBarry Smith c->imax = a->imax; 35114fd072dbSBarry Smith c->ilen = a->ilen; 35124fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 35134fd072dbSBarry Smith } else { 35149566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3515b6490206SBarry Smith for (i = 0; i < mbs; i++) { 35162593348eSBarry Smith c->imax[i] = a->imax[i]; 35172593348eSBarry Smith c->ilen[i] = a->ilen[i]; 35182593348eSBarry Smith } 35194fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 35204fd072dbSBarry Smith } 35212593348eSBarry Smith 35222593348eSBarry Smith /* allocate the matrix space */ 352316a2bf60SHong Zhang if (mallocmatspace) { 35244fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35259f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 35269f0612e4SBarry Smith PetscCall(PetscArrayzero(c->a, bs2 * nz)); 35279f0612e4SBarry Smith c->free_a = PETSC_TRUE; 35284fd072dbSBarry Smith c->i = a->i; 35294fd072dbSBarry Smith c->j = a->j; 3530379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 35314fd072dbSBarry Smith c->parent = A; 35321e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 35331e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 353426fbe8dcSKarl Rupp 35359566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 35369566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35379566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35384fd072dbSBarry Smith } else { 35399f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 35409f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 35419f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3542379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 35434fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 354426fbe8dcSKarl Rupp 35459566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3546b6490206SBarry Smith if (mbs > 0) { 35479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 35482e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 35499566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 35502e8a6d31SBarry Smith } else { 35519566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 35522593348eSBarry Smith } 35532593348eSBarry Smith } 35541e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 35551e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 355616a2bf60SHong Zhang } 35574fd072dbSBarry Smith } 355816a2bf60SHong Zhang 35592593348eSBarry Smith c->roworiented = a->roworiented; 35602593348eSBarry Smith c->nonew = a->nonew; 356126fbe8dcSKarl Rupp 35629566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 35639566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 356426fbe8dcSKarl Rupp 35655c9eb25fSBarry Smith c->bs2 = a->bs2; 35665c9eb25fSBarry Smith c->mbs = a->mbs; 35675c9eb25fSBarry Smith c->nbs = a->nbs; 35682593348eSBarry Smith c->nz = a->nz; 3569f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3570f361c04dSBarry Smith c->solve_work = NULL; 3571f361c04dSBarry Smith c->mult_work = NULL; 3572f361c04dSBarry Smith c->sor_workt = NULL; 3573f361c04dSBarry Smith c->sor_work = NULL; 357488e51ccdSHong Zhang 357588e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 357688e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3577cd6b891eSBarry Smith if (a->compressedrow.use) { 357888e51ccdSHong Zhang i = a->compressedrow.nrows; 35799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 35809566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 35819566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 358288e51ccdSHong Zhang } else { 358388e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 35840298fd71SBarry Smith c->compressedrow.i = NULL; 35850298fd71SBarry Smith c->compressedrow.rindex = NULL; 358688e51ccdSHong Zhang } 3587c05f355bSMark Adams c->nonzerorowcnt = a->nonzerorowcnt; 3588e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 358926fbe8dcSKarl Rupp 35909566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 35913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35922593348eSBarry Smith } 35932593348eSBarry Smith 3594d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3595d71ae5a4SJacob Faibussowitsch { 3596b24902e0SBarry Smith PetscFunctionBegin; 35979566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 35989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 35999566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 36009566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 36013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3602b24902e0SBarry Smith } 3603b24902e0SBarry Smith 3604618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3605d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3606d71ae5a4SJacob Faibussowitsch { 3607b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3608b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3609b51a4376SLisandro Dalcin PetscScalar *matvals; 3610b51a4376SLisandro Dalcin 3611b51a4376SLisandro Dalcin PetscFunctionBegin; 36129566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3613b51a4376SLisandro Dalcin 3614b51a4376SLisandro Dalcin /* read matrix header */ 36159566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 36165f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 36179371c9d4SSatish Balay M = header[1]; 36189371c9d4SSatish Balay N = header[2]; 36199371c9d4SSatish Balay nz = header[3]; 36205f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 36215f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 36225f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3623b51a4376SLisandro Dalcin 3624b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 36259566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3626b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3627b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3628b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3629b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3630b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 36319566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 36329566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3633b51a4376SLisandro Dalcin 3634b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 36359566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 36365f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 36379566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 36389566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 36399371c9d4SSatish Balay mbs = m / bs; 36409371c9d4SSatish Balay nbs = n / bs; 3641b51a4376SLisandro Dalcin 3642b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 36439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 36449566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 36459371c9d4SSatish Balay rowidxs[0] = 0; 36469371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3647b51a4376SLisandro Dalcin sum = rowidxs[m]; 36485f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3649b51a4376SLisandro Dalcin 3650b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 36519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 36529566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 36539566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3654b51a4376SLisandro Dalcin 3655b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3656b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3657b51a4376SLisandro Dalcin PetscInt *nnz; 3658618cc2edSLisandro Dalcin PetscBool sbaij; 3659b51a4376SLisandro Dalcin 36609566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 36619566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 36629566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3663b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 36649566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3665618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3666618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3667618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3668618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3669618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3670618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3671618cc2edSLisandro Dalcin } 3672618cc2edSLisandro Dalcin } 3673b51a4376SLisandro Dalcin } 36749566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 36759566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 36769566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 36779566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3678b51a4376SLisandro Dalcin } 3679b51a4376SLisandro Dalcin 3680b51a4376SLisandro Dalcin /* store matrix values */ 3681b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3682b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 36839927e4dfSBarry Smith PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3684b51a4376SLisandro Dalcin } 3685b51a4376SLisandro Dalcin 36869566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 36879566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 36889566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 36899566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 36903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3691b51a4376SLisandro Dalcin } 3692b51a4376SLisandro Dalcin 3693d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3694d71ae5a4SJacob Faibussowitsch { 36957f489da9SVaclav Hapla PetscBool isbinary; 3696f501eaabSShri Abhyankar 3697f501eaabSShri Abhyankar PetscFunctionBegin; 36989566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 36995f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 37009566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 37013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3702f501eaabSShri Abhyankar } 3703f501eaabSShri Abhyankar 37045d83a8b1SBarry Smith /*@ 370511a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3706273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 370720f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 370820f4b53cSBarry Smith (or the array `nnz`). 37092593348eSBarry Smith 3710d083f849SBarry Smith Collective 3711273d9f13SBarry Smith 3712273d9f13SBarry Smith Input Parameters: 371311a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 371411a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 371511a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3716273d9f13SBarry Smith . m - number of rows 3717273d9f13SBarry Smith . n - number of columns 371835d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 371935d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 372020f4b53cSBarry Smith (possibly different for each block row) or `NULL` 3721273d9f13SBarry Smith 3722273d9f13SBarry Smith Output Parameter: 3723273d9f13SBarry Smith . A - the matrix 3724273d9f13SBarry Smith 3725273d9f13SBarry Smith Options Database Keys: 372611a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3727a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3728273d9f13SBarry Smith 3729273d9f13SBarry Smith Level: intermediate 3730273d9f13SBarry Smith 3731273d9f13SBarry Smith Notes: 373277433607SBarry Smith It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 37332ef1f0ffSBarry Smith MatXXXXSetPreallocation() paradigm instead of this routine directly. 37342ef1f0ffSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 37352ef1f0ffSBarry Smith 3736d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3737d1be2dadSMatthew Knepley 37382ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 373949a6f317SBarry Smith 374035d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 374135d8aa7fSBarry Smith 37422ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3743273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 374420f4b53cSBarry Smith either one (as in Fortran) or zero. 3745273d9f13SBarry Smith 37462ef1f0ffSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 37472ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3748651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3749273d9f13SBarry Smith matrices. 3750273d9f13SBarry Smith 37511cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3752273d9f13SBarry Smith @*/ 3753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3754d71ae5a4SJacob Faibussowitsch { 3755273d9f13SBarry Smith PetscFunctionBegin; 37569566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 37579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 37589566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 37599566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 37603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3761273d9f13SBarry Smith } 3762273d9f13SBarry Smith 37635d83a8b1SBarry Smith /*@ 3764273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3765273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 376620f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 376720f4b53cSBarry Smith (or the array `nnz`). 3768273d9f13SBarry Smith 3769d083f849SBarry Smith Collective 3770273d9f13SBarry Smith 3771273d9f13SBarry Smith Input Parameters: 37721c4f3114SJed Brown + B - the matrix 377311a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 377411a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3775273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3776273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 37772ef1f0ffSBarry Smith (possibly different for each block row) or `NULL` 3778273d9f13SBarry Smith 3779273d9f13SBarry Smith Options Database Keys: 378011a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3781a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3782273d9f13SBarry Smith 3783273d9f13SBarry Smith Level: intermediate 3784273d9f13SBarry Smith 3785273d9f13SBarry Smith Notes: 37862ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 378749a6f317SBarry Smith 378811a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 3789aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 379020f4b53cSBarry Smith You can also run with the option `-info` and look for messages with the string 3791aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3792aa95bbe8SBarry Smith 37932ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3794273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 379520f4b53cSBarry Smith either one (as in Fortran) or zero. 3796273d9f13SBarry Smith 3797d8a51d2aSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 37982ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3799651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3800273d9f13SBarry Smith 38011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3802273d9f13SBarry Smith @*/ 3803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3804d71ae5a4SJacob Faibussowitsch { 3805273d9f13SBarry Smith PetscFunctionBegin; 38066ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 38076ba663aaSJed Brown PetscValidType(B, 1); 38086ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3809cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 38103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3811273d9f13SBarry Smith } 3812a1d92eedSBarry Smith 3813725b52f3SLisandro Dalcin /*@C 381411a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3815725b52f3SLisandro Dalcin 3816d083f849SBarry Smith Collective 3817725b52f3SLisandro Dalcin 3818725b52f3SLisandro Dalcin Input Parameters: 38191c4f3114SJed Brown + B - the matrix 382020f4b53cSBarry Smith . bs - the blocksize 3821d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero) 3822d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3823d8a51d2aSBarry Smith - v - optional values in the matrix, use `NULL` if not provided 3824725b52f3SLisandro Dalcin 3825664954b6SBarry Smith Level: advanced 3826725b52f3SLisandro Dalcin 38273adadaf3SJed Brown Notes: 3828d8a51d2aSBarry Smith The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3829d8a51d2aSBarry Smith 383011a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 383111a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 38323adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 383311a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 38343adadaf3SJed Brown block column and the second index is over columns within a block. 38353adadaf3SJed Brown 3836664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3837664954b6SBarry Smith 38381cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3839725b52f3SLisandro Dalcin @*/ 3840d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3841d71ae5a4SJacob Faibussowitsch { 3842725b52f3SLisandro Dalcin PetscFunctionBegin; 38436ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 38446ba663aaSJed Brown PetscValidType(B, 1); 38456ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3846cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 38473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3848725b52f3SLisandro Dalcin } 3849725b52f3SLisandro Dalcin 3850c75a6043SHong Zhang /*@ 385111a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3852c75a6043SHong Zhang 3853d083f849SBarry Smith Collective 3854c75a6043SHong Zhang 3855c75a6043SHong Zhang Input Parameters: 3856c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3857c75a6043SHong Zhang . bs - size of block 3858c75a6043SHong Zhang . m - number of rows 3859c75a6043SHong Zhang . n - number of columns 3860483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3861c75a6043SHong Zhang . j - column indices 3862c75a6043SHong Zhang - a - matrix values 3863c75a6043SHong Zhang 3864c75a6043SHong Zhang Output Parameter: 3865c75a6043SHong Zhang . mat - the matrix 3866c75a6043SHong Zhang 3867dfb205c3SBarry Smith Level: advanced 3868c75a6043SHong Zhang 3869c75a6043SHong Zhang Notes: 38702ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3871c75a6043SHong Zhang once the matrix is destroyed 3872c75a6043SHong Zhang 3873c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3874c75a6043SHong Zhang 38752ef1f0ffSBarry Smith The `i` and `j` indices are 0 based 3876c75a6043SHong Zhang 387711a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3878dfb205c3SBarry Smith 38793adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 38803adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 38813adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 38823adadaf3SJed Brown with column-major ordering within blocks. 3883dfb205c3SBarry Smith 38841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3885c75a6043SHong Zhang @*/ 3886d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3887d71ae5a4SJacob Faibussowitsch { 3888c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3889c75a6043SHong Zhang 3890c75a6043SHong Zhang PetscFunctionBegin; 38915f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 38925f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3893c75a6043SHong Zhang 38949566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 38959566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 38969566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 38979566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3898c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 38999566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3900c75a6043SHong Zhang 3901c75a6043SHong Zhang baij->i = i; 3902c75a6043SHong Zhang baij->j = j; 3903c75a6043SHong Zhang baij->a = a; 390426fbe8dcSKarl Rupp 3905c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3906e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3907e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3908ceb5bf51SJacob Faibussowitsch baij->free_imax_ilen = PETSC_TRUE; 3909c75a6043SHong Zhang 3910ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < m; ii++) { 3911ceb5bf51SJacob Faibussowitsch const PetscInt row_len = i[ii + 1] - i[ii]; 3912ceb5bf51SJacob Faibussowitsch 3913ceb5bf51SJacob Faibussowitsch baij->ilen[ii] = baij->imax[ii] = row_len; 3914ceb5bf51SJacob Faibussowitsch PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3915c75a6043SHong Zhang } 391676bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3917ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 39186bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 39196bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3920c75a6043SHong Zhang } 392176bd3646SJed Brown } 3922c75a6043SHong Zhang 39239566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 39249566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 39253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3926c75a6043SHong Zhang } 3927bdf6f3fcSHong Zhang 3928d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3929d71ae5a4SJacob Faibussowitsch { 3930bdf6f3fcSHong Zhang PetscFunctionBegin; 39319566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 39323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3933bdf6f3fcSHong Zhang } 3934