1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 1126cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 1226cec326SBarry Smith #define TYPE BAIJ 1326cec326SBarry Smith #define TYPE_BS 1426cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1526cec326SBarry Smith #undef TYPE_BS 1626cec326SBarry Smith #define TYPE_BS _BS 1726cec326SBarry Smith #define TYPE_BS_ON 1826cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 1926cec326SBarry Smith #undef TYPE_BS 2026cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h" 2126cec326SBarry Smith #undef TYPE 2226cec326SBarry Smith #undef TYPE_BS_ON 2326cec326SBarry Smith 247ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 257ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 267ea3e4caSstefano_zampini #endif 277ea3e4caSstefano_zampini 28b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30b5b72c8aSIrina Sokolova #endif 31c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32b5b72c8aSIrina Sokolova 33ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34d71ae5a4SJacob Faibussowitsch { 359463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36ff6a9541SJacob Faibussowitsch PetscInt m, n, ib, jb, bs = A->rmap->bs; 379463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 389463ebdaSPierre Jolivet 399463ebdaSPierre Jolivet PetscFunctionBegin; 409566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 41ff6a9541SJacob Faibussowitsch PetscCall(PetscArrayzero(reductions, n)); 429463ebdaSPierre Jolivet if (type == NORM_2) { 43ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 449463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 459463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 46857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 479463ebdaSPierre Jolivet a_val++; 489463ebdaSPierre Jolivet } 499463ebdaSPierre Jolivet } 509463ebdaSPierre Jolivet } 519463ebdaSPierre Jolivet } else if (type == NORM_1) { 52ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 539463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 549463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 55857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 569463ebdaSPierre Jolivet a_val++; 579463ebdaSPierre Jolivet } 589463ebdaSPierre Jolivet } 599463ebdaSPierre Jolivet } 609463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 61ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 629463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 639463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 649463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 669463ebdaSPierre Jolivet a_val++; 679463ebdaSPierre Jolivet } 689463ebdaSPierre Jolivet } 699463ebdaSPierre Jolivet } 70857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 73857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 74857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75857cbf51SRichard Tran Mills a_val++; 76857cbf51SRichard Tran Mills } 77857cbf51SRichard Tran Mills } 78857cbf51SRichard Tran Mills } 79857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 82857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 83857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84857cbf51SRichard Tran Mills a_val++; 85857cbf51SRichard Tran Mills } 86857cbf51SRichard Tran Mills } 87857cbf51SRichard Tran Mills } 88857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 899463ebdaSPierre Jolivet if (type == NORM_2) { 90ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 939463ebdaSPierre Jolivet } 943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 959463ebdaSPierre Jolivet } 969463ebdaSPierre Jolivet 97d71ae5a4SJacob Faibussowitsch PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98d71ae5a4SJacob Faibussowitsch { 99b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100de80f912SBarry Smith PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 1017f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 10262bba022SBarry Smith PetscReal shift = 0.0; 1031a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104b01c7715SBarry Smith 105b01c7715SBarry Smith PetscFunctionBegin; 106a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 107a455e926SHong Zhang 1089797317bSBarry Smith if (a->idiagvalid) { 1099797317bSBarry Smith if (values) *values = a->idiag; 1103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1119797317bSBarry Smith } 1129566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113b01c7715SBarry Smith diag_offset = a->diag; 1144dfa11a4SJacob Faibussowitsch if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115b01c7715SBarry Smith diag = a->idiag; 116bbead8a2SBarry Smith if (values) *values = a->idiag; 117b01c7715SBarry Smith /* factor and invert each block */ 118521d7252SBarry Smith switch (bs) { 119ab040260SJed Brown case 1: 120ab040260SJed Brown for (i = 0; i < mbs; i++) { 121ab040260SJed Brown odiag = v + 1 * diag_offset[i]; 122ab040260SJed Brown diag[0] = odiag[0]; 123ec1892c8SHong Zhang 124ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125ec1892c8SHong Zhang if (allowzeropivot) { 1267b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1277b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1287b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1299566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 13098921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131ec1892c8SHong Zhang } 132ec1892c8SHong Zhang 133d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134ab040260SJed Brown diag += 1; 135ab040260SJed Brown } 136ab040260SJed Brown break; 137b01c7715SBarry Smith case 2: 138b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 139b01c7715SBarry Smith odiag = v + 4 * diag_offset[i]; 1409371c9d4SSatish Balay diag[0] = odiag[0]; 1419371c9d4SSatish Balay diag[1] = odiag[1]; 1429371c9d4SSatish Balay diag[2] = odiag[2]; 1439371c9d4SSatish Balay diag[3] = odiag[3]; 1449566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1457b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146b01c7715SBarry Smith diag += 4; 147b01c7715SBarry Smith } 148b01c7715SBarry Smith break; 149b01c7715SBarry Smith case 3: 150b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 151b01c7715SBarry Smith odiag = v + 9 * diag_offset[i]; 1529371c9d4SSatish Balay diag[0] = odiag[0]; 1539371c9d4SSatish Balay diag[1] = odiag[1]; 1549371c9d4SSatish Balay diag[2] = odiag[2]; 1559371c9d4SSatish Balay diag[3] = odiag[3]; 1569371c9d4SSatish Balay diag[4] = odiag[4]; 1579371c9d4SSatish Balay diag[5] = odiag[5]; 1589371c9d4SSatish Balay diag[6] = odiag[6]; 1599371c9d4SSatish Balay diag[7] = odiag[7]; 160b01c7715SBarry Smith diag[8] = odiag[8]; 1619566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1627b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163b01c7715SBarry Smith diag += 9; 164b01c7715SBarry Smith } 165b01c7715SBarry Smith break; 166b01c7715SBarry Smith case 4: 167b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 168b01c7715SBarry Smith odiag = v + 16 * diag_offset[i]; 1699566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1709566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1717b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172b01c7715SBarry Smith diag += 16; 173b01c7715SBarry Smith } 174b01c7715SBarry Smith break; 175b01c7715SBarry Smith case 5: 176b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 177b01c7715SBarry Smith odiag = v + 25 * diag_offset[i]; 1789566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1807b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181b01c7715SBarry Smith diag += 25; 182b01c7715SBarry Smith } 183b01c7715SBarry Smith break; 184d49b2adcSBarry Smith case 6: 185d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 186d49b2adcSBarry Smith odiag = v + 36 * diag_offset[i]; 1879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1889566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1897b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190d49b2adcSBarry Smith diag += 36; 191d49b2adcSBarry Smith } 192d49b2adcSBarry Smith break; 193de80f912SBarry Smith case 7: 194de80f912SBarry Smith for (i = 0; i < mbs; i++) { 195de80f912SBarry Smith odiag = v + 49 * diag_offset[i]; 1969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1979566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1987b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199de80f912SBarry Smith diag += 49; 200de80f912SBarry Smith } 201de80f912SBarry Smith break; 202b01c7715SBarry Smith default: 2039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204de80f912SBarry Smith for (i = 0; i < mbs; i++) { 205de80f912SBarry Smith odiag = v + bs2 * diag_offset[i]; 2069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 2079566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 2087b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209de80f912SBarry Smith diag += bs2; 210de80f912SBarry Smith } 2119566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 212b01c7715SBarry Smith } 213b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 2143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 215b01c7715SBarry Smith } 216b01c7715SBarry Smith 217d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218d71ae5a4SJacob Faibussowitsch { 2196d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 221e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 222e48d15efSToby Isaac const PetscScalar *b, *xb; 2235455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226b01c7715SBarry Smith 227b01c7715SBarry Smith PetscFunctionBegin; 228b01c7715SBarry Smith its = its * lits; 2295f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2305f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2315f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2325f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2335f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234b01c7715SBarry Smith 2359566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236b01c7715SBarry Smith 2373ba16761SJacob Faibussowitsch if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238b01c7715SBarry Smith diag = a->diag; 239b01c7715SBarry Smith idiag = a->idiag; 240de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 24148a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 24248a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 24348a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2443475c22fSBarry Smith work = a->mult_work; 2453475c22fSBarry Smith t = a->sor_workt; 246de80f912SBarry Smith w = a->sor_work; 247de80f912SBarry Smith 2489566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 250de80f912SBarry Smith 251de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 252de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253e48d15efSToby Isaac switch (bs) { 254e48d15efSToby Isaac case 1: 255e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256e48d15efSToby Isaac t[0] = b[0]; 257e48d15efSToby Isaac i2 = 1; 258e48d15efSToby Isaac idiag += 1; 259e48d15efSToby Isaac for (i = 1; i < m; i++) { 260e48d15efSToby Isaac v = aa + ai[i]; 261e48d15efSToby Isaac vi = aj + ai[i]; 262e48d15efSToby Isaac nz = diag[i] - ai[i]; 263e48d15efSToby Isaac s[0] = b[i2]; 264e48d15efSToby Isaac for (j = 0; j < nz; j++) { 265e48d15efSToby Isaac xw[0] = x[vi[j]]; 266e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267e48d15efSToby Isaac } 268e48d15efSToby Isaac t[i2] = s[0]; 269e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270e48d15efSToby Isaac x[i2] = xw[0]; 271e48d15efSToby Isaac idiag += 1; 272e48d15efSToby Isaac i2 += 1; 273e48d15efSToby Isaac } 274e48d15efSToby Isaac break; 275e48d15efSToby Isaac case 2: 276e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2779371c9d4SSatish Balay t[0] = b[0]; 2789371c9d4SSatish Balay t[1] = b[1]; 279e48d15efSToby Isaac i2 = 2; 280e48d15efSToby Isaac idiag += 4; 281e48d15efSToby Isaac for (i = 1; i < m; i++) { 282e48d15efSToby Isaac v = aa + 4 * ai[i]; 283e48d15efSToby Isaac vi = aj + ai[i]; 284e48d15efSToby Isaac nz = diag[i] - ai[i]; 2859371c9d4SSatish Balay s[0] = b[i2]; 2869371c9d4SSatish Balay s[1] = b[i2 + 1]; 287e48d15efSToby Isaac for (j = 0; j < nz; j++) { 288e48d15efSToby Isaac idx = 2 * vi[j]; 289e48d15efSToby Isaac it = 4 * j; 2909371c9d4SSatish Balay xw[0] = x[idx]; 2919371c9d4SSatish Balay xw[1] = x[1 + idx]; 292e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293e48d15efSToby Isaac } 2949371c9d4SSatish Balay t[i2] = s[0]; 2959371c9d4SSatish Balay t[i2 + 1] = s[1]; 296e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2979371c9d4SSatish Balay x[i2] = xw[0]; 2989371c9d4SSatish Balay x[i2 + 1] = xw[1]; 299e48d15efSToby Isaac idiag += 4; 300e48d15efSToby Isaac i2 += 2; 301e48d15efSToby Isaac } 302e48d15efSToby Isaac break; 303e48d15efSToby Isaac case 3: 304e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 3059371c9d4SSatish Balay t[0] = b[0]; 3069371c9d4SSatish Balay t[1] = b[1]; 3079371c9d4SSatish Balay t[2] = b[2]; 308e48d15efSToby Isaac i2 = 3; 309e48d15efSToby Isaac idiag += 9; 310e48d15efSToby Isaac for (i = 1; i < m; i++) { 311e48d15efSToby Isaac v = aa + 9 * ai[i]; 312e48d15efSToby Isaac vi = aj + ai[i]; 313e48d15efSToby Isaac nz = diag[i] - ai[i]; 3149371c9d4SSatish Balay s[0] = b[i2]; 3159371c9d4SSatish Balay s[1] = b[i2 + 1]; 3169371c9d4SSatish Balay s[2] = b[i2 + 2]; 317e48d15efSToby Isaac while (nz--) { 318e48d15efSToby Isaac idx = 3 * (*vi++); 3199371c9d4SSatish Balay xw[0] = x[idx]; 3209371c9d4SSatish Balay xw[1] = x[1 + idx]; 3219371c9d4SSatish Balay xw[2] = x[2 + idx]; 322e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323e48d15efSToby Isaac v += 9; 324e48d15efSToby Isaac } 3259371c9d4SSatish Balay t[i2] = s[0]; 3269371c9d4SSatish Balay t[i2 + 1] = s[1]; 3279371c9d4SSatish Balay t[i2 + 2] = s[2]; 328e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3299371c9d4SSatish Balay x[i2] = xw[0]; 3309371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3319371c9d4SSatish Balay x[i2 + 2] = xw[2]; 332e48d15efSToby Isaac idiag += 9; 333e48d15efSToby Isaac i2 += 3; 334e48d15efSToby Isaac } 335e48d15efSToby Isaac break; 336e48d15efSToby Isaac case 4: 337e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3389371c9d4SSatish Balay t[0] = b[0]; 3399371c9d4SSatish Balay t[1] = b[1]; 3409371c9d4SSatish Balay t[2] = b[2]; 3419371c9d4SSatish Balay t[3] = b[3]; 342e48d15efSToby Isaac i2 = 4; 343e48d15efSToby Isaac idiag += 16; 344e48d15efSToby Isaac for (i = 1; i < m; i++) { 345e48d15efSToby Isaac v = aa + 16 * ai[i]; 346e48d15efSToby Isaac vi = aj + ai[i]; 347e48d15efSToby Isaac nz = diag[i] - ai[i]; 3489371c9d4SSatish Balay s[0] = b[i2]; 3499371c9d4SSatish Balay s[1] = b[i2 + 1]; 3509371c9d4SSatish Balay s[2] = b[i2 + 2]; 3519371c9d4SSatish Balay s[3] = b[i2 + 3]; 352e48d15efSToby Isaac while (nz--) { 353e48d15efSToby Isaac idx = 4 * (*vi++); 3549371c9d4SSatish Balay xw[0] = x[idx]; 3559371c9d4SSatish Balay xw[1] = x[1 + idx]; 3569371c9d4SSatish Balay xw[2] = x[2 + idx]; 3579371c9d4SSatish Balay xw[3] = x[3 + idx]; 358e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359e48d15efSToby Isaac v += 16; 360e48d15efSToby Isaac } 3619371c9d4SSatish Balay t[i2] = s[0]; 3629371c9d4SSatish Balay t[i2 + 1] = s[1]; 3639371c9d4SSatish Balay t[i2 + 2] = s[2]; 3649371c9d4SSatish Balay t[i2 + 3] = s[3]; 365e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3669371c9d4SSatish Balay x[i2] = xw[0]; 3679371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3689371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3699371c9d4SSatish Balay x[i2 + 3] = xw[3]; 370e48d15efSToby Isaac idiag += 16; 371e48d15efSToby Isaac i2 += 4; 372e48d15efSToby Isaac } 373e48d15efSToby Isaac break; 374e48d15efSToby Isaac case 5: 375e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3769371c9d4SSatish Balay t[0] = b[0]; 3779371c9d4SSatish Balay t[1] = b[1]; 3789371c9d4SSatish Balay t[2] = b[2]; 3799371c9d4SSatish Balay t[3] = b[3]; 3809371c9d4SSatish Balay t[4] = b[4]; 381e48d15efSToby Isaac i2 = 5; 382e48d15efSToby Isaac idiag += 25; 383e48d15efSToby Isaac for (i = 1; i < m; i++) { 384e48d15efSToby Isaac v = aa + 25 * ai[i]; 385e48d15efSToby Isaac vi = aj + ai[i]; 386e48d15efSToby Isaac nz = diag[i] - ai[i]; 3879371c9d4SSatish Balay s[0] = b[i2]; 3889371c9d4SSatish Balay s[1] = b[i2 + 1]; 3899371c9d4SSatish Balay s[2] = b[i2 + 2]; 3909371c9d4SSatish Balay s[3] = b[i2 + 3]; 3919371c9d4SSatish Balay s[4] = b[i2 + 4]; 392e48d15efSToby Isaac while (nz--) { 393e48d15efSToby Isaac idx = 5 * (*vi++); 3949371c9d4SSatish Balay xw[0] = x[idx]; 3959371c9d4SSatish Balay xw[1] = x[1 + idx]; 3969371c9d4SSatish Balay xw[2] = x[2 + idx]; 3979371c9d4SSatish Balay xw[3] = x[3 + idx]; 3989371c9d4SSatish Balay xw[4] = x[4 + idx]; 399e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400e48d15efSToby Isaac v += 25; 401e48d15efSToby Isaac } 4029371c9d4SSatish Balay t[i2] = s[0]; 4039371c9d4SSatish Balay t[i2 + 1] = s[1]; 4049371c9d4SSatish Balay t[i2 + 2] = s[2]; 4059371c9d4SSatish Balay t[i2 + 3] = s[3]; 4069371c9d4SSatish Balay t[i2 + 4] = s[4]; 407e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 4089371c9d4SSatish Balay x[i2] = xw[0]; 4099371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4109371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4119371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4129371c9d4SSatish Balay x[i2 + 4] = xw[4]; 413e48d15efSToby Isaac idiag += 25; 414e48d15efSToby Isaac i2 += 5; 415e48d15efSToby Isaac } 416e48d15efSToby Isaac break; 417e48d15efSToby Isaac case 6: 418e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4199371c9d4SSatish Balay t[0] = b[0]; 4209371c9d4SSatish Balay t[1] = b[1]; 4219371c9d4SSatish Balay t[2] = b[2]; 4229371c9d4SSatish Balay t[3] = b[3]; 4239371c9d4SSatish Balay t[4] = b[4]; 4249371c9d4SSatish Balay t[5] = b[5]; 425e48d15efSToby Isaac i2 = 6; 426e48d15efSToby Isaac idiag += 36; 427e48d15efSToby Isaac for (i = 1; i < m; i++) { 428e48d15efSToby Isaac v = aa + 36 * ai[i]; 429e48d15efSToby Isaac vi = aj + ai[i]; 430e48d15efSToby Isaac nz = diag[i] - ai[i]; 4319371c9d4SSatish Balay s[0] = b[i2]; 4329371c9d4SSatish Balay s[1] = b[i2 + 1]; 4339371c9d4SSatish Balay s[2] = b[i2 + 2]; 4349371c9d4SSatish Balay s[3] = b[i2 + 3]; 4359371c9d4SSatish Balay s[4] = b[i2 + 4]; 4369371c9d4SSatish Balay s[5] = b[i2 + 5]; 437e48d15efSToby Isaac while (nz--) { 438e48d15efSToby Isaac idx = 6 * (*vi++); 4399371c9d4SSatish Balay xw[0] = x[idx]; 4409371c9d4SSatish Balay xw[1] = x[1 + idx]; 4419371c9d4SSatish Balay xw[2] = x[2 + idx]; 4429371c9d4SSatish Balay xw[3] = x[3 + idx]; 4439371c9d4SSatish Balay xw[4] = x[4 + idx]; 4449371c9d4SSatish Balay xw[5] = x[5 + idx]; 445e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446e48d15efSToby Isaac v += 36; 447e48d15efSToby Isaac } 4489371c9d4SSatish Balay t[i2] = s[0]; 4499371c9d4SSatish Balay t[i2 + 1] = s[1]; 4509371c9d4SSatish Balay t[i2 + 2] = s[2]; 4519371c9d4SSatish Balay t[i2 + 3] = s[3]; 4529371c9d4SSatish Balay t[i2 + 4] = s[4]; 4539371c9d4SSatish Balay t[i2 + 5] = s[5]; 454e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4559371c9d4SSatish Balay x[i2] = xw[0]; 4569371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4579371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4589371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4599371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4609371c9d4SSatish Balay x[i2 + 5] = xw[5]; 461e48d15efSToby Isaac idiag += 36; 462e48d15efSToby Isaac i2 += 6; 463e48d15efSToby Isaac } 464e48d15efSToby Isaac break; 465e48d15efSToby Isaac case 7: 466e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4679371c9d4SSatish Balay t[0] = b[0]; 4689371c9d4SSatish Balay t[1] = b[1]; 4699371c9d4SSatish Balay t[2] = b[2]; 4709371c9d4SSatish Balay t[3] = b[3]; 4719371c9d4SSatish Balay t[4] = b[4]; 4729371c9d4SSatish Balay t[5] = b[5]; 4739371c9d4SSatish Balay t[6] = b[6]; 474e48d15efSToby Isaac i2 = 7; 475e48d15efSToby Isaac idiag += 49; 476e48d15efSToby Isaac for (i = 1; i < m; i++) { 477e48d15efSToby Isaac v = aa + 49 * ai[i]; 478e48d15efSToby Isaac vi = aj + ai[i]; 479e48d15efSToby Isaac nz = diag[i] - ai[i]; 4809371c9d4SSatish Balay s[0] = b[i2]; 4819371c9d4SSatish Balay s[1] = b[i2 + 1]; 4829371c9d4SSatish Balay s[2] = b[i2 + 2]; 4839371c9d4SSatish Balay s[3] = b[i2 + 3]; 4849371c9d4SSatish Balay s[4] = b[i2 + 4]; 4859371c9d4SSatish Balay s[5] = b[i2 + 5]; 4869371c9d4SSatish Balay s[6] = b[i2 + 6]; 487e48d15efSToby Isaac while (nz--) { 488e48d15efSToby Isaac idx = 7 * (*vi++); 4899371c9d4SSatish Balay xw[0] = x[idx]; 4909371c9d4SSatish Balay xw[1] = x[1 + idx]; 4919371c9d4SSatish Balay xw[2] = x[2 + idx]; 4929371c9d4SSatish Balay xw[3] = x[3 + idx]; 4939371c9d4SSatish Balay xw[4] = x[4 + idx]; 4949371c9d4SSatish Balay xw[5] = x[5 + idx]; 4959371c9d4SSatish Balay xw[6] = x[6 + idx]; 496e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497e48d15efSToby Isaac v += 49; 498e48d15efSToby Isaac } 4999371c9d4SSatish Balay t[i2] = s[0]; 5009371c9d4SSatish Balay t[i2 + 1] = s[1]; 5019371c9d4SSatish Balay t[i2 + 2] = s[2]; 5029371c9d4SSatish Balay t[i2 + 3] = s[3]; 5039371c9d4SSatish Balay t[i2 + 4] = s[4]; 5049371c9d4SSatish Balay t[i2 + 5] = s[5]; 5059371c9d4SSatish Balay t[i2 + 6] = s[6]; 506e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 5079371c9d4SSatish Balay x[i2] = xw[0]; 5089371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5099371c9d4SSatish Balay x[i2 + 2] = xw[2]; 5109371c9d4SSatish Balay x[i2 + 3] = xw[3]; 5119371c9d4SSatish Balay x[i2 + 4] = xw[4]; 5129371c9d4SSatish Balay x[i2 + 5] = xw[5]; 5139371c9d4SSatish Balay x[i2 + 6] = xw[6]; 514e48d15efSToby Isaac idiag += 49; 515e48d15efSToby Isaac i2 += 7; 516e48d15efSToby Isaac } 517e48d15efSToby Isaac break; 518e48d15efSToby Isaac default: 51996b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 521de80f912SBarry Smith i2 = bs; 522de80f912SBarry Smith idiag += bs2; 523de80f912SBarry Smith for (i = 1; i < m; i++) { 524de80f912SBarry Smith v = aa + bs2 * ai[i]; 525de80f912SBarry Smith vi = aj + ai[i]; 526de80f912SBarry Smith nz = diag[i] - ai[i]; 527de80f912SBarry Smith 5289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 529de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 530de80f912SBarry Smith workt = work; 531de80f912SBarry Smith for (j = 0; j < nz; j++) { 5329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533de80f912SBarry Smith workt += bs; 534de80f912SBarry Smith } 53596b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 53796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538de80f912SBarry Smith 539de80f912SBarry Smith idiag += bs2; 540de80f912SBarry Smith i2 += bs; 541de80f912SBarry Smith } 542e48d15efSToby Isaac break; 543e48d15efSToby Isaac } 544de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546e48d15efSToby Isaac xb = t; 5479371c9d4SSatish Balay } else xb = b; 548de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 550e48d15efSToby Isaac i2 = bs * (m - 1); 551e48d15efSToby Isaac switch (bs) { 552e48d15efSToby Isaac case 1: 553e48d15efSToby Isaac s[0] = xb[i2]; 554e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555e48d15efSToby Isaac x[i2] = xw[0]; 556e48d15efSToby Isaac i2 -= 1; 557e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 558e48d15efSToby Isaac v = aa + (diag[i] + 1); 559e48d15efSToby Isaac vi = aj + diag[i] + 1; 560e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 561e48d15efSToby Isaac s[0] = xb[i2]; 562e48d15efSToby Isaac for (j = 0; j < nz; j++) { 563e48d15efSToby Isaac xw[0] = x[vi[j]]; 564e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565e48d15efSToby Isaac } 566e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567e48d15efSToby Isaac x[i2] = xw[0]; 568e48d15efSToby Isaac idiag -= 1; 569e48d15efSToby Isaac i2 -= 1; 570e48d15efSToby Isaac } 571e48d15efSToby Isaac break; 572e48d15efSToby Isaac case 2: 5739371c9d4SSatish Balay s[0] = xb[i2]; 5749371c9d4SSatish Balay s[1] = xb[i2 + 1]; 575e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5769371c9d4SSatish Balay x[i2] = xw[0]; 5779371c9d4SSatish Balay x[i2 + 1] = xw[1]; 578e48d15efSToby Isaac i2 -= 2; 579e48d15efSToby Isaac idiag -= 4; 580e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 581e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 582e48d15efSToby Isaac vi = aj + diag[i] + 1; 583e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5849371c9d4SSatish Balay s[0] = xb[i2]; 5859371c9d4SSatish Balay s[1] = xb[i2 + 1]; 586e48d15efSToby Isaac for (j = 0; j < nz; j++) { 587e48d15efSToby Isaac idx = 2 * vi[j]; 588e48d15efSToby Isaac it = 4 * j; 5899371c9d4SSatish Balay xw[0] = x[idx]; 5909371c9d4SSatish Balay xw[1] = x[1 + idx]; 591e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592e48d15efSToby Isaac } 593e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5949371c9d4SSatish Balay x[i2] = xw[0]; 5959371c9d4SSatish Balay x[i2 + 1] = xw[1]; 596e48d15efSToby Isaac idiag -= 4; 597e48d15efSToby Isaac i2 -= 2; 598e48d15efSToby Isaac } 599e48d15efSToby Isaac break; 600e48d15efSToby Isaac case 3: 6019371c9d4SSatish Balay s[0] = xb[i2]; 6029371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6039371c9d4SSatish Balay s[2] = xb[i2 + 2]; 604e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6059371c9d4SSatish Balay x[i2] = xw[0]; 6069371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6079371c9d4SSatish Balay x[i2 + 2] = xw[2]; 608e48d15efSToby Isaac i2 -= 3; 609e48d15efSToby Isaac idiag -= 9; 610e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 611e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 612e48d15efSToby Isaac vi = aj + diag[i] + 1; 613e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6149371c9d4SSatish Balay s[0] = xb[i2]; 6159371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6169371c9d4SSatish Balay s[2] = xb[i2 + 2]; 617e48d15efSToby Isaac while (nz--) { 618e48d15efSToby Isaac idx = 3 * (*vi++); 6199371c9d4SSatish Balay xw[0] = x[idx]; 6209371c9d4SSatish Balay xw[1] = x[1 + idx]; 6219371c9d4SSatish Balay xw[2] = x[2 + idx]; 622e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623e48d15efSToby Isaac v += 9; 624e48d15efSToby Isaac } 625e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6269371c9d4SSatish Balay x[i2] = xw[0]; 6279371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6289371c9d4SSatish Balay x[i2 + 2] = xw[2]; 629e48d15efSToby Isaac idiag -= 9; 630e48d15efSToby Isaac i2 -= 3; 631e48d15efSToby Isaac } 632e48d15efSToby Isaac break; 633e48d15efSToby Isaac case 4: 6349371c9d4SSatish Balay s[0] = xb[i2]; 6359371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6369371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6379371c9d4SSatish Balay s[3] = xb[i2 + 3]; 638e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6399371c9d4SSatish Balay x[i2] = xw[0]; 6409371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6419371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6429371c9d4SSatish Balay x[i2 + 3] = xw[3]; 643e48d15efSToby Isaac i2 -= 4; 644e48d15efSToby Isaac idiag -= 16; 645e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 646e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 647e48d15efSToby Isaac vi = aj + diag[i] + 1; 648e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6499371c9d4SSatish Balay s[0] = xb[i2]; 6509371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6519371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6529371c9d4SSatish Balay s[3] = xb[i2 + 3]; 653e48d15efSToby Isaac while (nz--) { 654e48d15efSToby Isaac idx = 4 * (*vi++); 6559371c9d4SSatish Balay xw[0] = x[idx]; 6569371c9d4SSatish Balay xw[1] = x[1 + idx]; 6579371c9d4SSatish Balay xw[2] = x[2 + idx]; 6589371c9d4SSatish Balay xw[3] = x[3 + idx]; 659e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660e48d15efSToby Isaac v += 16; 661e48d15efSToby Isaac } 662e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6639371c9d4SSatish Balay x[i2] = xw[0]; 6649371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6659371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6669371c9d4SSatish Balay x[i2 + 3] = xw[3]; 667e48d15efSToby Isaac idiag -= 16; 668e48d15efSToby Isaac i2 -= 4; 669e48d15efSToby Isaac } 670e48d15efSToby Isaac break; 671e48d15efSToby Isaac case 5: 6729371c9d4SSatish Balay s[0] = xb[i2]; 6739371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6749371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6759371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6769371c9d4SSatish Balay s[4] = xb[i2 + 4]; 677e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6789371c9d4SSatish Balay x[i2] = xw[0]; 6799371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6809371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6819371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6829371c9d4SSatish Balay x[i2 + 4] = xw[4]; 683e48d15efSToby Isaac i2 -= 5; 684e48d15efSToby Isaac idiag -= 25; 685e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 686e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 687e48d15efSToby Isaac vi = aj + diag[i] + 1; 688e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6899371c9d4SSatish Balay s[0] = xb[i2]; 6909371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6919371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6929371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6939371c9d4SSatish Balay s[4] = xb[i2 + 4]; 694e48d15efSToby Isaac while (nz--) { 695e48d15efSToby Isaac idx = 5 * (*vi++); 6969371c9d4SSatish Balay xw[0] = x[idx]; 6979371c9d4SSatish Balay xw[1] = x[1 + idx]; 6989371c9d4SSatish Balay xw[2] = x[2 + idx]; 6999371c9d4SSatish Balay xw[3] = x[3 + idx]; 7009371c9d4SSatish Balay xw[4] = x[4 + idx]; 701e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702e48d15efSToby Isaac v += 25; 703e48d15efSToby Isaac } 704e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 7059371c9d4SSatish Balay x[i2] = xw[0]; 7069371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7079371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7089371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7099371c9d4SSatish Balay x[i2 + 4] = xw[4]; 710e48d15efSToby Isaac idiag -= 25; 711e48d15efSToby Isaac i2 -= 5; 712e48d15efSToby Isaac } 713e48d15efSToby Isaac break; 714e48d15efSToby Isaac case 6: 7159371c9d4SSatish Balay s[0] = xb[i2]; 7169371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7179371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7189371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7199371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7209371c9d4SSatish Balay s[5] = xb[i2 + 5]; 721e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7229371c9d4SSatish Balay x[i2] = xw[0]; 7239371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7249371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7259371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7269371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7279371c9d4SSatish Balay x[i2 + 5] = xw[5]; 728e48d15efSToby Isaac i2 -= 6; 729e48d15efSToby Isaac idiag -= 36; 730e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 731e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 732e48d15efSToby Isaac vi = aj + diag[i] + 1; 733e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7349371c9d4SSatish Balay s[0] = xb[i2]; 7359371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7369371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7379371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7389371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7399371c9d4SSatish Balay s[5] = xb[i2 + 5]; 740e48d15efSToby Isaac while (nz--) { 741e48d15efSToby Isaac idx = 6 * (*vi++); 7429371c9d4SSatish Balay xw[0] = x[idx]; 7439371c9d4SSatish Balay xw[1] = x[1 + idx]; 7449371c9d4SSatish Balay xw[2] = x[2 + idx]; 7459371c9d4SSatish Balay xw[3] = x[3 + idx]; 7469371c9d4SSatish Balay xw[4] = x[4 + idx]; 7479371c9d4SSatish Balay xw[5] = x[5 + idx]; 748e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749e48d15efSToby Isaac v += 36; 750e48d15efSToby Isaac } 751e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7529371c9d4SSatish Balay x[i2] = xw[0]; 7539371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7549371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7559371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7569371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7579371c9d4SSatish Balay x[i2 + 5] = xw[5]; 758e48d15efSToby Isaac idiag -= 36; 759e48d15efSToby Isaac i2 -= 6; 760e48d15efSToby Isaac } 761e48d15efSToby Isaac break; 762e48d15efSToby Isaac case 7: 7639371c9d4SSatish Balay s[0] = xb[i2]; 7649371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7659371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7669371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7679371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7689371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7699371c9d4SSatish Balay s[6] = xb[i2 + 6]; 770e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7719371c9d4SSatish Balay x[i2] = xw[0]; 7729371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7739371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7749371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7759371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7769371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7779371c9d4SSatish Balay x[i2 + 6] = xw[6]; 778e48d15efSToby Isaac i2 -= 7; 779e48d15efSToby Isaac idiag -= 49; 780e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 781e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 782e48d15efSToby Isaac vi = aj + diag[i] + 1; 783e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7849371c9d4SSatish Balay s[0] = xb[i2]; 7859371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7869371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7879371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7889371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7899371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7909371c9d4SSatish Balay s[6] = xb[i2 + 6]; 791e48d15efSToby Isaac while (nz--) { 792e48d15efSToby Isaac idx = 7 * (*vi++); 7939371c9d4SSatish Balay xw[0] = x[idx]; 7949371c9d4SSatish Balay xw[1] = x[1 + idx]; 7959371c9d4SSatish Balay xw[2] = x[2 + idx]; 7969371c9d4SSatish Balay xw[3] = x[3 + idx]; 7979371c9d4SSatish Balay xw[4] = x[4 + idx]; 7989371c9d4SSatish Balay xw[5] = x[5 + idx]; 7999371c9d4SSatish Balay xw[6] = x[6 + idx]; 800e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801e48d15efSToby Isaac v += 49; 802e48d15efSToby Isaac } 803e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 8049371c9d4SSatish Balay x[i2] = xw[0]; 8059371c9d4SSatish Balay x[i2 + 1] = xw[1]; 8069371c9d4SSatish Balay x[i2 + 2] = xw[2]; 8079371c9d4SSatish Balay x[i2 + 3] = xw[3]; 8089371c9d4SSatish Balay x[i2 + 4] = xw[4]; 8099371c9d4SSatish Balay x[i2 + 5] = xw[5]; 8109371c9d4SSatish Balay x[i2 + 6] = xw[6]; 811e48d15efSToby Isaac idiag -= 49; 812e48d15efSToby Isaac i2 -= 7; 813e48d15efSToby Isaac } 814e48d15efSToby Isaac break; 815e48d15efSToby Isaac default: 8169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 81796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818de80f912SBarry Smith i2 -= bs; 819e48d15efSToby Isaac idiag -= bs2; 820de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 821de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 822de80f912SBarry Smith vi = aj + diag[i] + 1; 823de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 824de80f912SBarry Smith 8259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 826de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 827de80f912SBarry Smith workt = work; 828de80f912SBarry Smith for (j = 0; j < nz; j++) { 8299566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830de80f912SBarry Smith workt += bs; 831de80f912SBarry Smith } 83296b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 83396b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834e48d15efSToby Isaac 835de80f912SBarry Smith idiag -= bs2; 836de80f912SBarry Smith i2 -= bs; 837de80f912SBarry Smith } 838e48d15efSToby Isaac break; 839e48d15efSToby Isaac } 8409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841de80f912SBarry Smith } 842e48d15efSToby Isaac its--; 843e48d15efSToby Isaac } 844e48d15efSToby Isaac while (its--) { 845e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846e48d15efSToby Isaac idiag = a->idiag; 847e48d15efSToby Isaac i2 = 0; 848e48d15efSToby Isaac switch (bs) { 849e48d15efSToby Isaac case 1: 850e48d15efSToby Isaac for (i = 0; i < m; i++) { 851e48d15efSToby Isaac v = aa + ai[i]; 852e48d15efSToby Isaac vi = aj + ai[i]; 853e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 854e48d15efSToby Isaac s[0] = b[i2]; 855e48d15efSToby Isaac for (j = 0; j < nz; j++) { 856e48d15efSToby Isaac xw[0] = x[vi[j]]; 857e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858e48d15efSToby Isaac } 859e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860e48d15efSToby Isaac x[i2] += xw[0]; 861e48d15efSToby Isaac idiag += 1; 862e48d15efSToby Isaac i2 += 1; 863e48d15efSToby Isaac } 864e48d15efSToby Isaac break; 865e48d15efSToby Isaac case 2: 866e48d15efSToby Isaac for (i = 0; i < m; i++) { 867e48d15efSToby Isaac v = aa + 4 * ai[i]; 868e48d15efSToby Isaac vi = aj + ai[i]; 869e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8709371c9d4SSatish Balay s[0] = b[i2]; 8719371c9d4SSatish Balay s[1] = b[i2 + 1]; 872e48d15efSToby Isaac for (j = 0; j < nz; j++) { 873e48d15efSToby Isaac idx = 2 * vi[j]; 874e48d15efSToby Isaac it = 4 * j; 8759371c9d4SSatish Balay xw[0] = x[idx]; 8769371c9d4SSatish Balay xw[1] = x[1 + idx]; 877e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878e48d15efSToby Isaac } 879e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8809371c9d4SSatish Balay x[i2] += xw[0]; 8819371c9d4SSatish Balay x[i2 + 1] += xw[1]; 882e48d15efSToby Isaac idiag += 4; 883e48d15efSToby Isaac i2 += 2; 884e48d15efSToby Isaac } 885e48d15efSToby Isaac break; 886e48d15efSToby Isaac case 3: 887e48d15efSToby Isaac for (i = 0; i < m; i++) { 888e48d15efSToby Isaac v = aa + 9 * ai[i]; 889e48d15efSToby Isaac vi = aj + ai[i]; 890e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8919371c9d4SSatish Balay s[0] = b[i2]; 8929371c9d4SSatish Balay s[1] = b[i2 + 1]; 8939371c9d4SSatish Balay s[2] = b[i2 + 2]; 894e48d15efSToby Isaac while (nz--) { 895e48d15efSToby Isaac idx = 3 * (*vi++); 8969371c9d4SSatish Balay xw[0] = x[idx]; 8979371c9d4SSatish Balay xw[1] = x[1 + idx]; 8989371c9d4SSatish Balay xw[2] = x[2 + idx]; 899e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900e48d15efSToby Isaac v += 9; 901e48d15efSToby Isaac } 902e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 9039371c9d4SSatish Balay x[i2] += xw[0]; 9049371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9059371c9d4SSatish Balay x[i2 + 2] += xw[2]; 906e48d15efSToby Isaac idiag += 9; 907e48d15efSToby Isaac i2 += 3; 908e48d15efSToby Isaac } 909e48d15efSToby Isaac break; 910e48d15efSToby Isaac case 4: 911e48d15efSToby Isaac for (i = 0; i < m; i++) { 912e48d15efSToby Isaac v = aa + 16 * ai[i]; 913e48d15efSToby Isaac vi = aj + ai[i]; 914e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9159371c9d4SSatish Balay s[0] = b[i2]; 9169371c9d4SSatish Balay s[1] = b[i2 + 1]; 9179371c9d4SSatish Balay s[2] = b[i2 + 2]; 9189371c9d4SSatish Balay s[3] = b[i2 + 3]; 919e48d15efSToby Isaac while (nz--) { 920e48d15efSToby Isaac idx = 4 * (*vi++); 9219371c9d4SSatish Balay xw[0] = x[idx]; 9229371c9d4SSatish Balay xw[1] = x[1 + idx]; 9239371c9d4SSatish Balay xw[2] = x[2 + idx]; 9249371c9d4SSatish Balay xw[3] = x[3 + idx]; 925e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926e48d15efSToby Isaac v += 16; 927e48d15efSToby Isaac } 928e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9299371c9d4SSatish Balay x[i2] += xw[0]; 9309371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9319371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9329371c9d4SSatish Balay x[i2 + 3] += xw[3]; 933e48d15efSToby Isaac idiag += 16; 934e48d15efSToby Isaac i2 += 4; 935e48d15efSToby Isaac } 936e48d15efSToby Isaac break; 937e48d15efSToby Isaac case 5: 938e48d15efSToby Isaac for (i = 0; i < m; i++) { 939e48d15efSToby Isaac v = aa + 25 * ai[i]; 940e48d15efSToby Isaac vi = aj + ai[i]; 941e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9429371c9d4SSatish Balay s[0] = b[i2]; 9439371c9d4SSatish Balay s[1] = b[i2 + 1]; 9449371c9d4SSatish Balay s[2] = b[i2 + 2]; 9459371c9d4SSatish Balay s[3] = b[i2 + 3]; 9469371c9d4SSatish Balay s[4] = b[i2 + 4]; 947e48d15efSToby Isaac while (nz--) { 948e48d15efSToby Isaac idx = 5 * (*vi++); 9499371c9d4SSatish Balay xw[0] = x[idx]; 9509371c9d4SSatish Balay xw[1] = x[1 + idx]; 9519371c9d4SSatish Balay xw[2] = x[2 + idx]; 9529371c9d4SSatish Balay xw[3] = x[3 + idx]; 9539371c9d4SSatish Balay xw[4] = x[4 + idx]; 954e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955e48d15efSToby Isaac v += 25; 956e48d15efSToby Isaac } 957e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9589371c9d4SSatish Balay x[i2] += xw[0]; 9599371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9609371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9619371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9629371c9d4SSatish Balay x[i2 + 4] += xw[4]; 963e48d15efSToby Isaac idiag += 25; 964e48d15efSToby Isaac i2 += 5; 965e48d15efSToby Isaac } 966e48d15efSToby Isaac break; 967e48d15efSToby Isaac case 6: 968e48d15efSToby Isaac for (i = 0; i < m; i++) { 969e48d15efSToby Isaac v = aa + 36 * ai[i]; 970e48d15efSToby Isaac vi = aj + ai[i]; 971e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9729371c9d4SSatish Balay s[0] = b[i2]; 9739371c9d4SSatish Balay s[1] = b[i2 + 1]; 9749371c9d4SSatish Balay s[2] = b[i2 + 2]; 9759371c9d4SSatish Balay s[3] = b[i2 + 3]; 9769371c9d4SSatish Balay s[4] = b[i2 + 4]; 9779371c9d4SSatish Balay s[5] = b[i2 + 5]; 978e48d15efSToby Isaac while (nz--) { 979e48d15efSToby Isaac idx = 6 * (*vi++); 9809371c9d4SSatish Balay xw[0] = x[idx]; 9819371c9d4SSatish Balay xw[1] = x[1 + idx]; 9829371c9d4SSatish Balay xw[2] = x[2 + idx]; 9839371c9d4SSatish Balay xw[3] = x[3 + idx]; 9849371c9d4SSatish Balay xw[4] = x[4 + idx]; 9859371c9d4SSatish Balay xw[5] = x[5 + idx]; 986e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987e48d15efSToby Isaac v += 36; 988e48d15efSToby Isaac } 989e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9909371c9d4SSatish Balay x[i2] += xw[0]; 9919371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9929371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9939371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9949371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9959371c9d4SSatish Balay x[i2 + 5] += xw[5]; 996e48d15efSToby Isaac idiag += 36; 997e48d15efSToby Isaac i2 += 6; 998e48d15efSToby Isaac } 999e48d15efSToby Isaac break; 1000e48d15efSToby Isaac case 7: 1001e48d15efSToby Isaac for (i = 0; i < m; i++) { 1002e48d15efSToby Isaac v = aa + 49 * ai[i]; 1003e48d15efSToby Isaac vi = aj + ai[i]; 1004e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10059371c9d4SSatish Balay s[0] = b[i2]; 10069371c9d4SSatish Balay s[1] = b[i2 + 1]; 10079371c9d4SSatish Balay s[2] = b[i2 + 2]; 10089371c9d4SSatish Balay s[3] = b[i2 + 3]; 10099371c9d4SSatish Balay s[4] = b[i2 + 4]; 10109371c9d4SSatish Balay s[5] = b[i2 + 5]; 10119371c9d4SSatish Balay s[6] = b[i2 + 6]; 1012e48d15efSToby Isaac while (nz--) { 1013e48d15efSToby Isaac idx = 7 * (*vi++); 10149371c9d4SSatish Balay xw[0] = x[idx]; 10159371c9d4SSatish Balay xw[1] = x[1 + idx]; 10169371c9d4SSatish Balay xw[2] = x[2 + idx]; 10179371c9d4SSatish Balay xw[3] = x[3 + idx]; 10189371c9d4SSatish Balay xw[4] = x[4 + idx]; 10199371c9d4SSatish Balay xw[5] = x[5 + idx]; 10209371c9d4SSatish Balay xw[6] = x[6 + idx]; 1021e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022e48d15efSToby Isaac v += 49; 1023e48d15efSToby Isaac } 1024e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10259371c9d4SSatish Balay x[i2] += xw[0]; 10269371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10279371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10289371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10299371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10309371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10319371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1032e48d15efSToby Isaac idiag += 49; 1033e48d15efSToby Isaac i2 += 7; 1034e48d15efSToby Isaac } 1035e48d15efSToby Isaac break; 1036e48d15efSToby Isaac default: 1037e48d15efSToby Isaac for (i = 0; i < m; i++) { 1038e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1039e48d15efSToby Isaac vi = aj + ai[i]; 1040e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1041e48d15efSToby Isaac 10429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1043e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1044e48d15efSToby Isaac workt = work; 1045e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10469566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047e48d15efSToby Isaac workt += bs; 1048e48d15efSToby Isaac } 1049e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051e48d15efSToby Isaac 1052e48d15efSToby Isaac idiag += bs2; 1053e48d15efSToby Isaac i2 += bs; 1054e48d15efSToby Isaac } 1055e48d15efSToby Isaac break; 1056e48d15efSToby Isaac } 10579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058e48d15efSToby Isaac } 1059e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1061e48d15efSToby Isaac i2 = bs * (m - 1); 1062e48d15efSToby Isaac switch (bs) { 1063e48d15efSToby Isaac case 1: 1064e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1065e48d15efSToby Isaac v = aa + ai[i]; 1066e48d15efSToby Isaac vi = aj + ai[i]; 1067e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1068e48d15efSToby Isaac s[0] = b[i2]; 1069e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1070e48d15efSToby Isaac xw[0] = x[vi[j]]; 1071e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072e48d15efSToby Isaac } 1073e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074e48d15efSToby Isaac x[i2] += xw[0]; 1075e48d15efSToby Isaac idiag -= 1; 1076e48d15efSToby Isaac i2 -= 1; 1077e48d15efSToby Isaac } 1078e48d15efSToby Isaac break; 1079e48d15efSToby Isaac case 2: 1080e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1081e48d15efSToby Isaac v = aa + 4 * ai[i]; 1082e48d15efSToby Isaac vi = aj + ai[i]; 1083e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10849371c9d4SSatish Balay s[0] = b[i2]; 10859371c9d4SSatish Balay s[1] = b[i2 + 1]; 1086e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1087e48d15efSToby Isaac idx = 2 * vi[j]; 1088e48d15efSToby Isaac it = 4 * j; 10899371c9d4SSatish Balay xw[0] = x[idx]; 10909371c9d4SSatish Balay xw[1] = x[1 + idx]; 1091e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092e48d15efSToby Isaac } 1093e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10949371c9d4SSatish Balay x[i2] += xw[0]; 10959371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1096e48d15efSToby Isaac idiag -= 4; 1097e48d15efSToby Isaac i2 -= 2; 1098e48d15efSToby Isaac } 1099e48d15efSToby Isaac break; 1100e48d15efSToby Isaac case 3: 1101e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1102e48d15efSToby Isaac v = aa + 9 * ai[i]; 1103e48d15efSToby Isaac vi = aj + ai[i]; 1104e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11059371c9d4SSatish Balay s[0] = b[i2]; 11069371c9d4SSatish Balay s[1] = b[i2 + 1]; 11079371c9d4SSatish Balay s[2] = b[i2 + 2]; 1108e48d15efSToby Isaac while (nz--) { 1109e48d15efSToby Isaac idx = 3 * (*vi++); 11109371c9d4SSatish Balay xw[0] = x[idx]; 11119371c9d4SSatish Balay xw[1] = x[1 + idx]; 11129371c9d4SSatish Balay xw[2] = x[2 + idx]; 1113e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114e48d15efSToby Isaac v += 9; 1115e48d15efSToby Isaac } 1116e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11179371c9d4SSatish Balay x[i2] += xw[0]; 11189371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11199371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1120e48d15efSToby Isaac idiag -= 9; 1121e48d15efSToby Isaac i2 -= 3; 1122e48d15efSToby Isaac } 1123e48d15efSToby Isaac break; 1124e48d15efSToby Isaac case 4: 1125e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1126e48d15efSToby Isaac v = aa + 16 * ai[i]; 1127e48d15efSToby Isaac vi = aj + ai[i]; 1128e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11299371c9d4SSatish Balay s[0] = b[i2]; 11309371c9d4SSatish Balay s[1] = b[i2 + 1]; 11319371c9d4SSatish Balay s[2] = b[i2 + 2]; 11329371c9d4SSatish Balay s[3] = b[i2 + 3]; 1133e48d15efSToby Isaac while (nz--) { 1134e48d15efSToby Isaac idx = 4 * (*vi++); 11359371c9d4SSatish Balay xw[0] = x[idx]; 11369371c9d4SSatish Balay xw[1] = x[1 + idx]; 11379371c9d4SSatish Balay xw[2] = x[2 + idx]; 11389371c9d4SSatish Balay xw[3] = x[3 + idx]; 1139e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140e48d15efSToby Isaac v += 16; 1141e48d15efSToby Isaac } 1142e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11439371c9d4SSatish Balay x[i2] += xw[0]; 11449371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11459371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11469371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1147e48d15efSToby Isaac idiag -= 16; 1148e48d15efSToby Isaac i2 -= 4; 1149e48d15efSToby Isaac } 1150e48d15efSToby Isaac break; 1151e48d15efSToby Isaac case 5: 1152e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1153e48d15efSToby Isaac v = aa + 25 * ai[i]; 1154e48d15efSToby Isaac vi = aj + ai[i]; 1155e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11569371c9d4SSatish Balay s[0] = b[i2]; 11579371c9d4SSatish Balay s[1] = b[i2 + 1]; 11589371c9d4SSatish Balay s[2] = b[i2 + 2]; 11599371c9d4SSatish Balay s[3] = b[i2 + 3]; 11609371c9d4SSatish Balay s[4] = b[i2 + 4]; 1161e48d15efSToby Isaac while (nz--) { 1162e48d15efSToby Isaac idx = 5 * (*vi++); 11639371c9d4SSatish Balay xw[0] = x[idx]; 11649371c9d4SSatish Balay xw[1] = x[1 + idx]; 11659371c9d4SSatish Balay xw[2] = x[2 + idx]; 11669371c9d4SSatish Balay xw[3] = x[3 + idx]; 11679371c9d4SSatish Balay xw[4] = x[4 + idx]; 1168e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169e48d15efSToby Isaac v += 25; 1170e48d15efSToby Isaac } 1171e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11729371c9d4SSatish Balay x[i2] += xw[0]; 11739371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11749371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11759371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11769371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1177e48d15efSToby Isaac idiag -= 25; 1178e48d15efSToby Isaac i2 -= 5; 1179e48d15efSToby Isaac } 1180e48d15efSToby Isaac break; 1181e48d15efSToby Isaac case 6: 1182e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1183e48d15efSToby Isaac v = aa + 36 * ai[i]; 1184e48d15efSToby Isaac vi = aj + ai[i]; 1185e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11869371c9d4SSatish Balay s[0] = b[i2]; 11879371c9d4SSatish Balay s[1] = b[i2 + 1]; 11889371c9d4SSatish Balay s[2] = b[i2 + 2]; 11899371c9d4SSatish Balay s[3] = b[i2 + 3]; 11909371c9d4SSatish Balay s[4] = b[i2 + 4]; 11919371c9d4SSatish Balay s[5] = b[i2 + 5]; 1192e48d15efSToby Isaac while (nz--) { 1193e48d15efSToby Isaac idx = 6 * (*vi++); 11949371c9d4SSatish Balay xw[0] = x[idx]; 11959371c9d4SSatish Balay xw[1] = x[1 + idx]; 11969371c9d4SSatish Balay xw[2] = x[2 + idx]; 11979371c9d4SSatish Balay xw[3] = x[3 + idx]; 11989371c9d4SSatish Balay xw[4] = x[4 + idx]; 11999371c9d4SSatish Balay xw[5] = x[5 + idx]; 1200e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201e48d15efSToby Isaac v += 36; 1202e48d15efSToby Isaac } 1203e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 12049371c9d4SSatish Balay x[i2] += xw[0]; 12059371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12069371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12079371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12089371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12099371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1210e48d15efSToby Isaac idiag -= 36; 1211e48d15efSToby Isaac i2 -= 6; 1212e48d15efSToby Isaac } 1213e48d15efSToby Isaac break; 1214e48d15efSToby Isaac case 7: 1215e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1216e48d15efSToby Isaac v = aa + 49 * ai[i]; 1217e48d15efSToby Isaac vi = aj + ai[i]; 1218e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12199371c9d4SSatish Balay s[0] = b[i2]; 12209371c9d4SSatish Balay s[1] = b[i2 + 1]; 12219371c9d4SSatish Balay s[2] = b[i2 + 2]; 12229371c9d4SSatish Balay s[3] = b[i2 + 3]; 12239371c9d4SSatish Balay s[4] = b[i2 + 4]; 12249371c9d4SSatish Balay s[5] = b[i2 + 5]; 12259371c9d4SSatish Balay s[6] = b[i2 + 6]; 1226e48d15efSToby Isaac while (nz--) { 1227e48d15efSToby Isaac idx = 7 * (*vi++); 12289371c9d4SSatish Balay xw[0] = x[idx]; 12299371c9d4SSatish Balay xw[1] = x[1 + idx]; 12309371c9d4SSatish Balay xw[2] = x[2 + idx]; 12319371c9d4SSatish Balay xw[3] = x[3 + idx]; 12329371c9d4SSatish Balay xw[4] = x[4 + idx]; 12339371c9d4SSatish Balay xw[5] = x[5 + idx]; 12349371c9d4SSatish Balay xw[6] = x[6 + idx]; 1235e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236e48d15efSToby Isaac v += 49; 1237e48d15efSToby Isaac } 1238e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12399371c9d4SSatish Balay x[i2] += xw[0]; 12409371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12419371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12429371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12439371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12449371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12459371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1246e48d15efSToby Isaac idiag -= 49; 1247e48d15efSToby Isaac i2 -= 7; 1248e48d15efSToby Isaac } 1249e48d15efSToby Isaac break; 1250e48d15efSToby Isaac default: 1251e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1252e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1253e48d15efSToby Isaac vi = aj + ai[i]; 1254e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1255e48d15efSToby Isaac 12569566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1257e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1258e48d15efSToby Isaac workt = work; 1259e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261e48d15efSToby Isaac workt += bs; 1262e48d15efSToby Isaac } 1263e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265e48d15efSToby Isaac 1266e48d15efSToby Isaac idiag -= bs2; 1267e48d15efSToby Isaac i2 -= bs; 1268e48d15efSToby Isaac } 1269e48d15efSToby Isaac break; 1270e48d15efSToby Isaac } 12719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272e48d15efSToby Isaac } 1273e48d15efSToby Isaac } 12749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1277de80f912SBarry Smith } 1278de80f912SBarry Smith 1279af674e45SBarry Smith /* 128081824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281af674e45SBarry Smith */ 1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1286af674e45SBarry Smith #endif 1287af674e45SBarry Smith 1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289d71ae5a4SJacob Faibussowitsch { 1290af674e45SBarry Smith Mat A = *AA; 1291af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 129417ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1295f15d580aSBarry Smith const PetscScalar *value = v; 12964bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1297af674e45SBarry Smith 1298af674e45SBarry Smith PetscFunctionBegin; 1299ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300af674e45SBarry Smith stepval = (n - 1) * 4; 1301af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1302af674e45SBarry Smith row = im[k]; 1303af674e45SBarry Smith rp = aj + ai[row]; 1304af674e45SBarry Smith ap = aa + 16 * ai[row]; 1305af674e45SBarry Smith nrow = ailen[row]; 1306af674e45SBarry Smith low = 0; 130717ec6a02SBarry Smith high = nrow; 1308af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1309af674e45SBarry Smith col = in[l]; 1310db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1311db4deed7SKarl Rupp else high = nrow; 131217ec6a02SBarry Smith lastcol = col; 13131e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1314af674e45SBarry Smith while (high - low > 7) { 1315af674e45SBarry Smith t = (low + high) / 2; 1316af674e45SBarry Smith if (rp[t] > col) high = t; 1317af674e45SBarry Smith else low = t; 1318af674e45SBarry Smith } 1319af674e45SBarry Smith for (i = low; i < high; i++) { 1320af674e45SBarry Smith if (rp[i] > col) break; 1321af674e45SBarry Smith if (rp[i] == col) { 1322af674e45SBarry Smith bap = ap + 16 * i; 1323af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1324ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325af674e45SBarry Smith } 1326af674e45SBarry Smith goto noinsert2; 1327af674e45SBarry Smith } 1328af674e45SBarry Smith } 1329af674e45SBarry Smith N = nrow++ - 1; 133017ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1331af674e45SBarry Smith /* shift up all the later entries in this row */ 1332af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1333af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13349566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335af674e45SBarry Smith } 133648a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337af674e45SBarry Smith rp[i] = col; 1338af674e45SBarry Smith bap = ap + 16 * i; 1339af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1340ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341af674e45SBarry Smith } 1342af674e45SBarry Smith noinsert2:; 1343af674e45SBarry Smith low = i; 1344af674e45SBarry Smith } 1345af674e45SBarry Smith ailen[row] = nrow; 1346af674e45SBarry Smith } 1347be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1348af674e45SBarry Smith } 1349af674e45SBarry Smith 1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1354af674e45SBarry Smith #endif 1355af674e45SBarry Smith 1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357d71ae5a4SJacob Faibussowitsch { 1358af674e45SBarry Smith Mat A = *AA; 1359af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1362c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 136317ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1364af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1365af674e45SBarry Smith 1366af674e45SBarry Smith PetscFunctionBegin; 1367af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13689371c9d4SSatish Balay row = im[k]; 13699371c9d4SSatish Balay brow = row / 4; 1370af674e45SBarry Smith rp = aj + ai[brow]; 1371af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1372af674e45SBarry Smith nrow = ailen[brow]; 1373af674e45SBarry Smith low = 0; 137417ec6a02SBarry Smith high = nrow; 1375af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13769371c9d4SSatish Balay col = in[l]; 13779371c9d4SSatish Balay bcol = col / 4; 13789371c9d4SSatish Balay ridx = row % 4; 13799371c9d4SSatish Balay cidx = col % 4; 1380af674e45SBarry Smith value = v[l + k * n]; 1381db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1382db4deed7SKarl Rupp else high = nrow; 138317ec6a02SBarry Smith lastcol = col; 1384af674e45SBarry Smith while (high - low > 7) { 1385af674e45SBarry Smith t = (low + high) / 2; 1386af674e45SBarry Smith if (rp[t] > bcol) high = t; 1387af674e45SBarry Smith else low = t; 1388af674e45SBarry Smith } 1389af674e45SBarry Smith for (i = low; i < high; i++) { 1390af674e45SBarry Smith if (rp[i] > bcol) break; 1391af674e45SBarry Smith if (rp[i] == bcol) { 1392af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1393af674e45SBarry Smith *bap += value; 1394af674e45SBarry Smith goto noinsert1; 1395af674e45SBarry Smith } 1396af674e45SBarry Smith } 1397af674e45SBarry Smith N = nrow++ - 1; 139817ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1399af674e45SBarry Smith /* shift up all the later entries in this row */ 14009566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 14019566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 14029566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403af674e45SBarry Smith rp[i] = bcol; 1404af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1405af674e45SBarry Smith noinsert1:; 1406af674e45SBarry Smith low = i; 1407af674e45SBarry Smith } 1408af674e45SBarry Smith ailen[brow] = nrow; 1409af674e45SBarry Smith } 1410be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1411af674e45SBarry Smith } 1412af674e45SBarry Smith 1413be5855fcSBarry Smith /* 1414be5855fcSBarry Smith Checks for missing diagonals 1415be5855fcSBarry Smith */ 1416d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417d71ae5a4SJacob Faibussowitsch { 1418be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14197734d3b5SMatthew G. Knepley PetscInt *diag, *ii = a->i, i; 1420be5855fcSBarry Smith 1421be5855fcSBarry Smith PetscFunctionBegin; 14229566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 14232af78befSBarry Smith *missing = PETSC_FALSE; 14247734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 14252efa7f71SHong Zhang *missing = PETSC_TRUE; 14262efa7f71SHong Zhang if (d) *d = 0; 14279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 14282efa7f71SHong Zhang } else { 142901445905SHong Zhang PetscInt n; 143001445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1431883fce79SBarry Smith diag = a->diag; 143201445905SHong Zhang for (i = 0; i < n; i++) { 14337734d3b5SMatthew G. Knepley if (diag[i] >= ii[i + 1]) { 14342af78befSBarry Smith *missing = PETSC_TRUE; 14352af78befSBarry Smith if (d) *d = i; 14369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437358d2f5dSShri Abhyankar break; 14382efa7f71SHong Zhang } 1439be5855fcSBarry Smith } 1440be5855fcSBarry Smith } 14413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1442be5855fcSBarry Smith } 1443be5855fcSBarry Smith 1444d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445d71ae5a4SJacob Faibussowitsch { 1446de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 144709f38230SBarry Smith PetscInt i, j, m = a->mbs; 1448de6a44a3SBarry Smith 14493a40ed3dSBarry Smith PetscFunctionBegin; 145009f38230SBarry Smith if (!a->diag) { 14519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->diag)); 14524fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 145309f38230SBarry Smith } 14547fc0212eSBarry Smith for (i = 0; i < m; i++) { 145509f38230SBarry Smith a->diag[i] = a->i[i + 1]; 1456de6a44a3SBarry Smith for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457de6a44a3SBarry Smith if (a->j[j] == i) { 145809f38230SBarry Smith a->diag[i] = j; 1459de6a44a3SBarry Smith break; 1460de6a44a3SBarry Smith } 1461de6a44a3SBarry Smith } 1462de6a44a3SBarry Smith } 14633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1464de6a44a3SBarry Smith } 14652593348eSBarry Smith 1466d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467d71ae5a4SJacob Faibussowitsch { 14683b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14691a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14701a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14713b2fbd54SBarry Smith 14723a40ed3dSBarry Smith PetscFunctionBegin; 14733b2fbd54SBarry Smith *nn = n; 14743ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 14753b2fbd54SBarry Smith if (symmetric) { 14769566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477553b3c51SBarry Smith nz = tia[n]; 14783b2fbd54SBarry Smith } else { 14799371c9d4SSatish Balay tia = a->i; 14809371c9d4SSatish Balay tja = a->j; 14813b2fbd54SBarry Smith } 14823b2fbd54SBarry Smith 1483ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1484ecc77c7aSBarry Smith (*nn) *= bs; 14858f7157efSSatish Balay /* malloc & create the natural set of indices */ 14869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14879985e31cSBarry Smith if (n) { 14882462f5fdSStefano Zampini (*ia)[0] = oshift; 1489ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 14909985e31cSBarry Smith } 1491ecc77c7aSBarry Smith 1492ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1493ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 14958f7157efSSatish Balay } 1496ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497ecc77c7aSBarry Smith 14981a83f524SJed Brown if (inja) { 14999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 15009985e31cSBarry Smith cnt = 0; 15019985e31cSBarry Smith for (i = 0; i < n; i++) { 15029985e31cSBarry Smith for (j = 0; j < bs; j++) { 15039985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 1504ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 15059985e31cSBarry Smith } 15069985e31cSBarry Smith } 15079985e31cSBarry Smith } 15089985e31cSBarry Smith } 15099985e31cSBarry Smith 15108f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 15119566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 15129566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 15138f7157efSSatish Balay } 1514f6d58c54SBarry Smith } else if (oshift == 1) { 1515715a17b5SBarry Smith if (symmetric) { 1516a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1517715a17b5SBarry Smith /* add 1 to i and j indices */ 1518715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519715a17b5SBarry Smith *ia = tia; 1520715a17b5SBarry Smith if (ja) { 1521715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522715a17b5SBarry Smith *ja = tja; 1523715a17b5SBarry Smith } 1524715a17b5SBarry Smith } else { 1525a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1526f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 15279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529f6d58c54SBarry Smith if (ja) { 15309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1531f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532f6d58c54SBarry Smith } 1533715a17b5SBarry Smith } 15348f7157efSSatish Balay } else { 15358f7157efSSatish Balay *ia = tia; 1536ecc77c7aSBarry Smith if (ja) *ja = tja; 15378f7157efSSatish Balay } 15383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15393b2fbd54SBarry Smith } 15403b2fbd54SBarry Smith 1541d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542d71ae5a4SJacob Faibussowitsch { 15433a40ed3dSBarry Smith PetscFunctionBegin; 15443ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 15469566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 15479566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 15483b2fbd54SBarry Smith } 15493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15503b2fbd54SBarry Smith } 15513b2fbd54SBarry Smith 1552d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553d71ae5a4SJacob Faibussowitsch { 15542d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15552d61bbb3SSatish Balay 1556433994e6SBarry Smith PetscFunctionBegin; 1557*b4e2f619SBarry Smith if (A->hash_active) { 1558*b4e2f619SBarry Smith PetscInt bs; 1559*b4e2f619SBarry Smith PetscCall(PetscMemcpy(&A->ops, &a->cops, sizeof(*(A->ops)))); 1560*b4e2f619SBarry Smith PetscCall(PetscHMapIJVDestroy(&a->ht)); 1561*b4e2f619SBarry Smith PetscCall(MatGetBlockSize(A, &bs)); 1562*b4e2f619SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1563*b4e2f619SBarry Smith PetscCall(PetscFree(a->dnz)); 1564*b4e2f619SBarry Smith PetscCall(PetscFree(a->bdnz)); 1565*b4e2f619SBarry Smith A->hash_active = PETSC_FALSE; 1566*b4e2f619SBarry Smith } 1567aa482453SBarry Smith #if defined(PETSC_USE_LOG) 15683ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 15692d61bbb3SSatish Balay #endif 15709566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 15739566063dSJacob Faibussowitsch if (a->free_diag) PetscCall(PetscFree(a->diag)); 15749566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15759566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15769566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15779566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15789566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15799566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15819566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15829566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1583c4319e64SHong Zhang 15849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15869566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1587901853e0SKris Buschelman 15889566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 16007ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 16019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 16027ea3e4caSstefano_zampini #endif 16039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 16042e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 16053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16062d61bbb3SSatish Balay } 16072d61bbb3SSatish Balay 1608d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1609d71ae5a4SJacob Faibussowitsch { 16102d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 16112d61bbb3SSatish Balay 16122d61bbb3SSatish Balay PetscFunctionBegin; 1613aa275fccSKris Buschelman switch (op) { 1614d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED: 1615d71ae5a4SJacob Faibussowitsch a->roworiented = flg; 1616d71ae5a4SJacob Faibussowitsch break; 1617d71ae5a4SJacob Faibussowitsch case MAT_KEEP_NONZERO_PATTERN: 1618d71ae5a4SJacob Faibussowitsch a->keepnonzeropattern = flg; 1619d71ae5a4SJacob Faibussowitsch break; 1620d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATIONS: 1621d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? 0 : 1); 1622d71ae5a4SJacob Faibussowitsch break; 1623d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATION_ERR: 1624d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -1 : 0); 1625d71ae5a4SJacob Faibussowitsch break; 1626d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_ALLOCATION_ERR: 1627d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -2 : 0); 1628d71ae5a4SJacob Faibussowitsch break; 1629d71ae5a4SJacob Faibussowitsch case MAT_UNUSED_NONZERO_LOCATION_ERR: 1630d71ae5a4SJacob Faibussowitsch a->nounused = (flg ? -1 : 0); 1631d71ae5a4SJacob Faibussowitsch break; 16328c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1633aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1634aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1635d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1636d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1637d71ae5a4SJacob Faibussowitsch break; 16385021d80fSJed Brown case MAT_SPD: 163977e54ba9SKris Buschelman case MAT_SYMMETRIC: 164077e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 16419a4540c5SBarry Smith case MAT_HERMITIAN: 16429a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1643b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1644c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1645672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 1646b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1647b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 164877e54ba9SKris Buschelman break; 1649d71ae5a4SJacob Faibussowitsch default: 1650d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 16512d61bbb3SSatish Balay } 16523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16532d61bbb3SSatish Balay } 16542d61bbb3SSatish Balay 165552768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1656d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1657d71ae5a4SJacob Faibussowitsch { 165852768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 165952768537SHong Zhang MatScalar *aa_i; 166087828ca2SBarry Smith PetscScalar *v_i; 16612d61bbb3SSatish Balay 16622d61bbb3SSatish Balay PetscFunctionBegin; 1663d0f46423SBarry Smith bs = A->rmap->bs; 166452768537SHong Zhang bs2 = bs * bs; 16655f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 16662d61bbb3SSatish Balay 16672d61bbb3SSatish Balay bn = row / bs; /* Block number */ 16682d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 16692d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 16702d61bbb3SSatish Balay *nz = bs * M; 16712d61bbb3SSatish Balay 16722d61bbb3SSatish Balay if (v) { 1673f4259b30SLisandro Dalcin *v = NULL; 16742d61bbb3SSatish Balay if (*nz) { 16759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16762d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16772d61bbb3SSatish Balay v_i = *v + i * bs; 16782d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 167926fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16802d61bbb3SSatish Balay } 16812d61bbb3SSatish Balay } 16822d61bbb3SSatish Balay } 16832d61bbb3SSatish Balay 16842d61bbb3SSatish Balay if (idx) { 1685f4259b30SLisandro Dalcin *idx = NULL; 16862d61bbb3SSatish Balay if (*nz) { 16879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16882d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16892d61bbb3SSatish Balay idx_i = *idx + i * bs; 16902d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 169126fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16922d61bbb3SSatish Balay } 16932d61bbb3SSatish Balay } 16942d61bbb3SSatish Balay } 16953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16962d61bbb3SSatish Balay } 16972d61bbb3SSatish Balay 1698d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1699d71ae5a4SJacob Faibussowitsch { 170052768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 170152768537SHong Zhang 170252768537SHong Zhang PetscFunctionBegin; 17039566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 17043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 170552768537SHong Zhang } 170652768537SHong Zhang 1707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1708d71ae5a4SJacob Faibussowitsch { 17092d61bbb3SSatish Balay PetscFunctionBegin; 1710cb4a9cd9SHong Zhang if (nz) *nz = 0; 17119566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 17129566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 17133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17142d61bbb3SSatish Balay } 17152d61bbb3SSatish Balay 1716d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1717d71ae5a4SJacob Faibussowitsch { 171820e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 17192d61bbb3SSatish Balay Mat C; 172020e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 172120e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 172220e84f26SHong Zhang MatScalar *ata, *aa = a->a; 17232d61bbb3SSatish Balay 17242d61bbb3SSatish Balay PetscFunctionBegin; 17257fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 17269566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1727cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 172820e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 17292d61bbb3SSatish Balay 17309566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 17319566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 17329566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 17339566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 173420e84f26SHong Zhang 173520e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 173620e84f26SHong Zhang ati = at->i; 173720e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1738fc4dec0aSBarry Smith } else { 1739fc4dec0aSBarry Smith C = *B; 174020e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 174120e84f26SHong Zhang ati = at->i; 1742fc4dec0aSBarry Smith } 1743fc4dec0aSBarry Smith 174420e84f26SHong Zhang atj = at->j; 174520e84f26SHong Zhang ata = at->a; 174620e84f26SHong Zhang 174720e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 17489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 174920e84f26SHong Zhang 175020e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 17512d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 175220e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 175320e84f26SHong Zhang for (j = 0; j < anzj; j++) { 175420e84f26SHong Zhang atj[atfill[*aj]] = i; 175520e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 1756ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 17572d61bbb3SSatish Balay } 175820e84f26SHong Zhang atfill[*aj++] += 1; 175920e84f26SHong Zhang } 176020e84f26SHong Zhang } 17619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 17629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 17632d61bbb3SSatish Balay 176420e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 17659566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 176620e84f26SHong Zhang 1767cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 17689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 17692d61bbb3SSatish Balay *B = C; 17702d61bbb3SSatish Balay } else { 17719566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 17722d61bbb3SSatish Balay } 17733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17742d61bbb3SSatish Balay } 17752d61bbb3SSatish Balay 1776ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1777d71ae5a4SJacob Faibussowitsch { 1778453d3561SHong Zhang Mat Btrans; 1779453d3561SHong Zhang 1780453d3561SHong Zhang PetscFunctionBegin; 1781453d3561SHong Zhang *f = PETSC_FALSE; 1782acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17839566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 17853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1786453d3561SHong Zhang } 1787453d3561SHong Zhang 1788618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1789d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1790d71ae5a4SJacob Faibussowitsch { 1791b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1792b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1793b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1794b51a4376SLisandro Dalcin PetscScalar *matvals; 17952593348eSBarry Smith 17963a40ed3dSBarry Smith PetscFunctionBegin; 17979566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17983b2fbd54SBarry Smith 1799b51a4376SLisandro Dalcin M = mat->rmap->N; 1800b51a4376SLisandro Dalcin N = mat->cmap->N; 1801b51a4376SLisandro Dalcin m = mat->rmap->n; 1802b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1803b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 18042593348eSBarry Smith 1805b51a4376SLisandro Dalcin /* write matrix header */ 1806b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 18079371c9d4SSatish Balay header[1] = M; 18089371c9d4SSatish Balay header[2] = N; 18099371c9d4SSatish Balay header[3] = nz; 18109566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 18112593348eSBarry Smith 1812b51a4376SLisandro Dalcin /* store row lengths */ 18139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1814b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 18159371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 18169566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 18179566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1818b51a4376SLisandro Dalcin 1819b51a4376SLisandro Dalcin /* store column indices */ 18209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1821b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1822b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1823b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18249371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 18255f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18269566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 18279566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 18282593348eSBarry Smith 18292593348eSBarry Smith /* store nonzero values */ 18309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1831b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1832b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1833b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 18349371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 18355f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 18369566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 18379566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1838ce6f0cecSBarry Smith 1839b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 18409566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 18413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18422593348eSBarry Smith } 18432593348eSBarry Smith 1844d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1845d71ae5a4SJacob Faibussowitsch { 18467dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 18477dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 18487dc0baabSHong Zhang 18497dc0baabSHong Zhang PetscFunctionBegin; 18509566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 18517dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 18529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 185348a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 18549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18557dc0baabSHong Zhang } 18569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18587dc0baabSHong Zhang } 18597dc0baabSHong Zhang 1860d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1861d71ae5a4SJacob Faibussowitsch { 1862b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1863d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1864f3ef73ceSBarry Smith PetscViewerFormat format; 18652593348eSBarry Smith 18663a40ed3dSBarry Smith PetscFunctionBegin; 18677dc0baabSHong Zhang if (A->structure_only) { 18689566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 18693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18707dc0baabSHong Zhang } 18717dc0baabSHong Zhang 18729566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1873456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1875fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1876ade3a672SBarry Smith const char *matname; 1877bcd9e38bSBarry Smith Mat aij; 18789566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18799566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18809566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18819566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 188304929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 18843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1885fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18869566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 188744cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 188844cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 189044cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 189144cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1892aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18930e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18949371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18950e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18969371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18970e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18989566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18990ef38995SBarry Smith } 190044cd7ae7SLois Curfman McInnes #else 190148a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 190244cd7ae7SLois Curfman McInnes #endif 190344cd7ae7SLois Curfman McInnes } 190444cd7ae7SLois Curfman McInnes } 19059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 190644cd7ae7SLois Curfman McInnes } 190744cd7ae7SLois Curfman McInnes } 19089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 19090ef38995SBarry Smith } else { 19109566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1911b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1912b6490206SBarry Smith for (j = 0; j < bs; j++) { 19139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1914b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1915b6490206SBarry Smith for (l = 0; l < bs; l++) { 1916aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 19170e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 19189371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 19190e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 19209371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 19210ef38995SBarry Smith } else { 19229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 192388685aaeSLois Curfman McInnes } 192488685aaeSLois Curfman McInnes #else 19259566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 192688685aaeSLois Curfman McInnes #endif 19272593348eSBarry Smith } 19282593348eSBarry Smith } 19299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 19302593348eSBarry Smith } 19312593348eSBarry Smith } 19329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1933b6490206SBarry Smith } 19349566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 19353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19362593348eSBarry Smith } 19372593348eSBarry Smith 19389804daf3SBarry Smith #include <petscdraw.h> 1939d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1940d71ae5a4SJacob Faibussowitsch { 194177ed5343SBarry Smith Mat A = (Mat)Aa; 19423270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1943d0f46423SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 19440e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 19453f1db9ecSBarry Smith MatScalar *aa; 1946b0a32e0cSBarry Smith PetscViewer viewer; 1947b3e7f47fSJed Brown PetscViewerFormat format; 19483270192aSSatish Balay 19493a40ed3dSBarry Smith PetscFunctionBegin; 19509566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 19519566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 19529566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 195377ed5343SBarry Smith 19543270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1955b3e7f47fSJed Brown 1956b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1957d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1958383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1959b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 19603270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19613270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19629371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19639371c9d4SSatish Balay y_r = y_l + 1.0; 19649371c9d4SSatish Balay x_l = a->j[j] * bs; 19659371c9d4SSatish Balay x_r = x_l + 1.0; 19663270192aSSatish Balay aa = a->a + j * bs2; 19673270192aSSatish Balay for (k = 0; k < bs; k++) { 19683270192aSSatish Balay for (l = 0; l < bs; l++) { 19690e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 19709566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19713270192aSSatish Balay } 19723270192aSSatish Balay } 19733270192aSSatish Balay } 19743270192aSSatish Balay } 1975b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19763270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19773270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19789371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19799371c9d4SSatish Balay y_r = y_l + 1.0; 19809371c9d4SSatish Balay x_l = a->j[j] * bs; 19819371c9d4SSatish Balay x_r = x_l + 1.0; 19823270192aSSatish Balay aa = a->a + j * bs2; 19833270192aSSatish Balay for (k = 0; k < bs; k++) { 19843270192aSSatish Balay for (l = 0; l < bs; l++) { 19850e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19869566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19873270192aSSatish Balay } 19883270192aSSatish Balay } 19893270192aSSatish Balay } 19903270192aSSatish Balay } 1991b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19923270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19933270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19949371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19959371c9d4SSatish Balay y_r = y_l + 1.0; 19969371c9d4SSatish Balay x_l = a->j[j] * bs; 19979371c9d4SSatish Balay x_r = x_l + 1.0; 19983270192aSSatish Balay aa = a->a + j * bs2; 19993270192aSSatish Balay for (k = 0; k < bs; k++) { 20003270192aSSatish Balay for (l = 0; l < bs; l++) { 20010e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 20029566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 20033270192aSSatish Balay } 20043270192aSSatish Balay } 20053270192aSSatish Balay } 20063270192aSSatish Balay } 2007d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 2008b3e7f47fSJed Brown } else { 2009b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 2010b3e7f47fSJed Brown /* first determine max of all nonzero values */ 2011b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 2012b3e7f47fSJed Brown PetscDraw popup; 2013b3e7f47fSJed Brown 2014b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 2015b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2016b3e7f47fSJed Brown } 2017383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 20189566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 20199566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2020383922c3SLisandro Dalcin 2021d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 2022b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 2023b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 20249371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 20259371c9d4SSatish Balay y_r = y_l + 1.0; 20269371c9d4SSatish Balay x_l = a->j[j] * bs; 20279371c9d4SSatish Balay x_r = x_l + 1.0; 2028b3e7f47fSJed Brown aa = a->a + j * bs2; 2029b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 2030b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 2031383922c3SLisandro Dalcin MatScalar v = *aa++; 2032383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 20339566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2034b3e7f47fSJed Brown } 2035b3e7f47fSJed Brown } 2036b3e7f47fSJed Brown } 2037b3e7f47fSJed Brown } 2038d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 2039b3e7f47fSJed Brown } 20403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 204177ed5343SBarry Smith } 20423270192aSSatish Balay 2043d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2044d71ae5a4SJacob Faibussowitsch { 20450e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 2046b0a32e0cSBarry Smith PetscDraw draw; 2047ace3abfcSBarry Smith PetscBool isnull; 20483270192aSSatish Balay 204977ed5343SBarry Smith PetscFunctionBegin; 20509566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 20519566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 20523ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 205377ed5343SBarry Smith 20549371c9d4SSatish Balay xr = A->cmap->n; 20559371c9d4SSatish Balay yr = A->rmap->N; 20569371c9d4SSatish Balay h = yr / 10.0; 20579371c9d4SSatish Balay w = xr / 10.0; 20589371c9d4SSatish Balay xr += w; 20599371c9d4SSatish Balay yr += h; 20609371c9d4SSatish Balay xl = -w; 20619371c9d4SSatish Balay yl = -h; 20629566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 20639566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 20649566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 20659566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 20669566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 20673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20683270192aSSatish Balay } 20693270192aSSatish Balay 2070d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2071d71ae5a4SJacob Faibussowitsch { 2072ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw; 20732593348eSBarry Smith 20743a40ed3dSBarry Smith PetscFunctionBegin; 20759566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 20769566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 207832077d6dSBarry Smith if (iascii) { 20799566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20800f5bd95cSBarry Smith } else if (isbinary) { 20819566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20820f5bd95cSBarry Smith } else if (isdraw) { 20839566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20845cd90555SBarry Smith } else { 2085a5e6ed63SBarry Smith Mat B; 20869566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20879566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20892593348eSBarry Smith } 20903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20912593348eSBarry Smith } 2092b6490206SBarry Smith 2093d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2094d71ae5a4SJacob Faibussowitsch { 2095cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2096c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2097c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2098d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 209997e567efSBarry Smith MatScalar *ap, *aa = a->a; 2100cd0e1443SSatish Balay 21013a40ed3dSBarry Smith PetscFunctionBegin; 21022d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 21039371c9d4SSatish Balay row = im[k]; 21049371c9d4SSatish Balay brow = row / bs; 21059371c9d4SSatish Balay if (row < 0) { 21069371c9d4SSatish Balay v += n; 21079371c9d4SSatish Balay continue; 21089371c9d4SSatish Balay } /* negative row */ 210954c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2110d29f2997SMatthew Woehlke rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2111d29f2997SMatthew Woehlke ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 21122c3acbe9SBarry Smith nrow = ailen[brow]; 21132d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 21149371c9d4SSatish Balay if (in[l] < 0) { 21159371c9d4SSatish Balay v++; 21169371c9d4SSatish Balay continue; 21179371c9d4SSatish Balay } /* negative column */ 211854c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 21192d61bbb3SSatish Balay col = in[l]; 21202d61bbb3SSatish Balay bcol = col / bs; 21212d61bbb3SSatish Balay cidx = col % bs; 21222d61bbb3SSatish Balay ridx = row % bs; 21232d61bbb3SSatish Balay high = nrow; 21242d61bbb3SSatish Balay low = 0; /* assume unsorted */ 21252d61bbb3SSatish Balay while (high - low > 5) { 2126cd0e1443SSatish Balay t = (low + high) / 2; 2127cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2128cd0e1443SSatish Balay else low = t; 2129cd0e1443SSatish Balay } 2130cd0e1443SSatish Balay for (i = low; i < high; i++) { 2131cd0e1443SSatish Balay if (rp[i] > bcol) break; 2132cd0e1443SSatish Balay if (rp[i] == bcol) { 21332d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 21342d61bbb3SSatish Balay goto finished; 2135cd0e1443SSatish Balay } 2136cd0e1443SSatish Balay } 213797e567efSBarry Smith *v++ = 0.0; 21382d61bbb3SSatish Balay finished:; 2139cd0e1443SSatish Balay } 2140cd0e1443SSatish Balay } 21413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2142cd0e1443SSatish Balay } 2143cd0e1443SSatish Balay 2144d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2145d71ae5a4SJacob Faibussowitsch { 214692c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2147e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2148c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2149d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2150ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2151dd6ea824SBarry Smith const PetscScalar *value = v; 21529d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 215392c4ed94SBarry Smith 21543a40ed3dSBarry Smith PetscFunctionBegin; 21550e324ae4SSatish Balay if (roworiented) { 21560e324ae4SSatish Balay stepval = (n - 1) * bs; 21570e324ae4SSatish Balay } else { 21580e324ae4SSatish Balay stepval = (m - 1) * bs; 21590e324ae4SSatish Balay } 216092c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 216192c4ed94SBarry Smith row = im[k]; 21625ef9f2a5SBarry Smith if (row < 0) continue; 21636bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 216492c4ed94SBarry Smith rp = aj + ai[row]; 21657dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 216692c4ed94SBarry Smith rmax = imax[row]; 216792c4ed94SBarry Smith nrow = ailen[row]; 216892c4ed94SBarry Smith low = 0; 2169c71e6ed7SBarry Smith high = nrow; 217092c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 21715ef9f2a5SBarry Smith if (in[l] < 0) continue; 21726bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 217392c4ed94SBarry Smith col = in[l]; 21747dc0baabSHong Zhang if (!A->structure_only) { 217592c4ed94SBarry Smith if (roworiented) { 217653ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21770e324ae4SSatish Balay } else { 217853ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 217992c4ed94SBarry Smith } 21807dc0baabSHong Zhang } 218126fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 218226fbe8dcSKarl Rupp else high = nrow; 2183e2ee6c50SBarry Smith lastcol = col; 218492c4ed94SBarry Smith while (high - low > 7) { 218592c4ed94SBarry Smith t = (low + high) / 2; 218692c4ed94SBarry Smith if (rp[t] > col) high = t; 218792c4ed94SBarry Smith else low = t; 218892c4ed94SBarry Smith } 218992c4ed94SBarry Smith for (i = low; i < high; i++) { 219092c4ed94SBarry Smith if (rp[i] > col) break; 219192c4ed94SBarry Smith if (rp[i] == col) { 21927dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21938a84c255SSatish Balay bap = ap + bs2 * i; 21940e324ae4SSatish Balay if (roworiented) { 21958a84c255SSatish Balay if (is == ADD_VALUES) { 2196dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2197ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2198dd9472c6SBarry Smith } 21990e324ae4SSatish Balay } else { 2200dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2201ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2202dd9472c6SBarry Smith } 2203dd9472c6SBarry Smith } 22040e324ae4SSatish Balay } else { 22050e324ae4SSatish Balay if (is == ADD_VALUES) { 220653ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2207ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 220853ef36baSBarry Smith bap += bs; 2209dd9472c6SBarry Smith } 22100e324ae4SSatish Balay } else { 221153ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2212ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 221353ef36baSBarry Smith bap += bs; 22148a84c255SSatish Balay } 2215dd9472c6SBarry Smith } 2216dd9472c6SBarry Smith } 2217f1241b54SBarry Smith goto noinsert2; 221892c4ed94SBarry Smith } 221992c4ed94SBarry Smith } 222089280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 22215f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 22227dc0baabSHong Zhang if (A->structure_only) { 22237dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 22247dc0baabSHong Zhang } else { 2225fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 22267dc0baabSHong Zhang } 22279371c9d4SSatish Balay N = nrow++ - 1; 22289371c9d4SSatish Balay high++; 222992c4ed94SBarry Smith /* shift up all the later entries in this row */ 22309566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 223192c4ed94SBarry Smith rp[i] = col; 22327dc0baabSHong Zhang if (!A->structure_only) { 22339566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 22348a84c255SSatish Balay bap = ap + bs2 * i; 22350e324ae4SSatish Balay if (roworiented) { 2236dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2237ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2238dd9472c6SBarry Smith } 22390e324ae4SSatish Balay } else { 2240dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2241ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2242dd9472c6SBarry Smith } 2243dd9472c6SBarry Smith } 22447dc0baabSHong Zhang } 2245f1241b54SBarry Smith noinsert2:; 224692c4ed94SBarry Smith low = i; 224792c4ed94SBarry Smith } 224892c4ed94SBarry Smith ailen[row] = nrow; 224992c4ed94SBarry Smith } 22503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 225192c4ed94SBarry Smith } 225226e093fcSHong Zhang 2253d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2254d71ae5a4SJacob Faibussowitsch { 2255584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2256580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2257d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2258c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 22593f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 22603447b6efSHong Zhang PetscReal ratio = 0.6; 2261584200bdSSatish Balay 22623a40ed3dSBarry Smith PetscFunctionBegin; 22633ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2264584200bdSSatish Balay 226543ee02c3SBarry Smith if (m) rmax = ailen[0]; 2266584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2267584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2268584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2269d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2270584200bdSSatish Balay if (fshift) { 2271580bdb30SBarry Smith ip = aj + ai[i]; 2272580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2273584200bdSSatish Balay N = ailen[i]; 22749566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 227548a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2276672ba085SHong Zhang } 2277584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2278584200bdSSatish Balay } 2279584200bdSSatish Balay if (mbs) { 2280584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2281584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2282584200bdSSatish Balay } 22837c565772SBarry Smith 2284584200bdSSatish Balay /* reset ilen and imax for each row */ 22857c565772SBarry Smith a->nonzerorowcnt = 0; 2286672ba085SHong Zhang if (A->structure_only) { 22879566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2288672ba085SHong Zhang } else { /* !A->structure_only */ 2289584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2290584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22917c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2292584200bdSSatish Balay } 2293672ba085SHong Zhang } 2294a7c10996SSatish Balay a->nz = ai[mbs]; 2295584200bdSSatish Balay 2296584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2297b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2298ff6a9541SJacob Faibussowitsch if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 22995f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 23009566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 23019566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 23029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 230326fbe8dcSKarl Rupp 23048e58a170SBarry Smith A->info.mallocs += a->reallocs; 2305e2f3b5e9SSatish Balay a->reallocs = 0; 23060e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2307647a6520SHong Zhang a->rmax = rmax; 2308cf4441caSHong Zhang 230948a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 23103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2311584200bdSSatish Balay } 2312584200bdSSatish Balay 2313bea157c4SSatish Balay /* 2314bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2315bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2316a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2317bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2318bea157c4SSatish Balay */ 2319d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2320d71ae5a4SJacob Faibussowitsch { 2321ff6a9541SJacob Faibussowitsch PetscInt j = 0; 23223a40ed3dSBarry Smith 2323433994e6SBarry Smith PetscFunctionBegin; 2324ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; j++) { 2325ff6a9541SJacob Faibussowitsch PetscInt row = idx[i]; 2326a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2327bea157c4SSatish Balay sizes[j] = 1; 2328bea157c4SSatish Balay i++; 2329e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2330bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2331bea157c4SSatish Balay i++; 23326aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2333ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_TRUE; 2334ff6a9541SJacob Faibussowitsch for (PetscInt k = 1; k < bs; k++) { 2335bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2336bea157c4SSatish Balay flg = PETSC_FALSE; 2337bea157c4SSatish Balay break; 2338d9b7c43dSSatish Balay } 2339bea157c4SSatish Balay } 2340abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2341bea157c4SSatish Balay sizes[j] = bs; 2342bea157c4SSatish Balay i += bs; 2343bea157c4SSatish Balay } else { 2344bea157c4SSatish Balay sizes[j] = 1; 2345bea157c4SSatish Balay i++; 2346bea157c4SSatish Balay } 2347bea157c4SSatish Balay } 2348bea157c4SSatish Balay } 2349bea157c4SSatish Balay *bs_max = j; 23503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2351d9b7c43dSSatish Balay } 2352d9b7c43dSSatish Balay 2353d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2354d71ae5a4SJacob Faibussowitsch { 2355d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2356f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2357d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 235887828ca2SBarry Smith PetscScalar zero = 0.0; 23593f1db9ecSBarry Smith MatScalar *aa; 236097b48c8fSBarry Smith const PetscScalar *xx; 236197b48c8fSBarry Smith PetscScalar *bb; 2362d9b7c43dSSatish Balay 23633a40ed3dSBarry Smith PetscFunctionBegin; 236497b48c8fSBarry Smith /* fix right hand side if needed */ 236597b48c8fSBarry Smith if (x && b) { 23669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23679566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 2368ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 23699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 237197b48c8fSBarry Smith } 237297b48c8fSBarry Smith 2373d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2374bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23759566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2376bea157c4SSatish Balay 2377563b5814SBarry Smith /* copy IS values to rows, and sort them */ 237826fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23799566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 238097b48c8fSBarry Smith 2381a9817697SBarry Smith if (baij->keepnonzeropattern) { 238226fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2383dffd3267SBarry Smith bs_max = is_n; 2384dffd3267SBarry Smith } else { 23859566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2386e56f5c9eSBarry Smith A->nonzerostate++; 2387dffd3267SBarry Smith } 2388bea157c4SSatish Balay 2389bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2390bea157c4SSatish Balay row = rows[j]; 23915f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2392bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2393b31fbe3bSSatish Balay aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2394a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2395d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2396bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2397bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2398bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 239926fbe8dcSKarl Rupp 24009566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2401a07cd24cSSatish Balay } 2402563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 240348a46eb9SPierre Jolivet for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2404f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2405bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2406f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2407bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 24086bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2409bea157c4SSatish Balay for (k = 0; k < count; k++) { 2410d9b7c43dSSatish Balay aa[0] = zero; 2411d9b7c43dSSatish Balay aa += bs; 2412d9b7c43dSSatish Balay } 241348a46eb9SPierre Jolivet if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2414d9b7c43dSSatish Balay } 2415bea157c4SSatish Balay } 2416bea157c4SSatish Balay 24179566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 24189566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2420d9b7c43dSSatish Balay } 24211c351548SSatish Balay 2422ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2423d71ae5a4SJacob Faibussowitsch { 242497b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 242597b48c8fSBarry Smith PetscInt i, j, k, count; 242697b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 242797b48c8fSBarry Smith PetscScalar zero = 0.0; 242897b48c8fSBarry Smith MatScalar *aa; 242997b48c8fSBarry Smith const PetscScalar *xx; 243097b48c8fSBarry Smith PetscScalar *bb; 243156777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 243297b48c8fSBarry Smith 243397b48c8fSBarry Smith PetscFunctionBegin; 243497b48c8fSBarry Smith /* fix right hand side if needed */ 243597b48c8fSBarry Smith if (x && b) { 24369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 24379566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 243856777dd2SBarry Smith vecs = PETSC_TRUE; 243997b48c8fSBarry Smith } 244097b48c8fSBarry Smith 244197b48c8fSBarry Smith /* zero the columns */ 24429566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 244397b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 24445f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 244597b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 244697b48c8fSBarry Smith } 244797b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 244897b48c8fSBarry Smith if (!zeroed[i]) { 244997b48c8fSBarry Smith row = i / bs; 245097b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 245197b48c8fSBarry Smith for (k = 0; k < bs; k++) { 245297b48c8fSBarry Smith col = bs * baij->j[j] + k; 245397b48c8fSBarry Smith if (zeroed[col]) { 245497b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 245556777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 245697b48c8fSBarry Smith aa[0] = 0.0; 245797b48c8fSBarry Smith } 245897b48c8fSBarry Smith } 245997b48c8fSBarry Smith } 246056777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 246197b48c8fSBarry Smith } 24629566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 246356777dd2SBarry Smith if (vecs) { 24649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 24659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 246656777dd2SBarry Smith } 246797b48c8fSBarry Smith 246897b48c8fSBarry Smith /* zero the rows */ 246997b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 247097b48c8fSBarry Smith row = is_idx[i]; 247197b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 247297b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 247397b48c8fSBarry Smith for (k = 0; k < count; k++) { 247497b48c8fSBarry Smith aa[0] = zero; 247597b48c8fSBarry Smith aa += bs; 247697b48c8fSBarry Smith } 2477dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 247897b48c8fSBarry Smith } 24799566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 24803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 248197b48c8fSBarry Smith } 248297b48c8fSBarry Smith 2483d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2484d71ae5a4SJacob Faibussowitsch { 24852d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2486e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2487c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2488d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2489c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2490ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2491d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24922d61bbb3SSatish Balay 24932d61bbb3SSatish Balay PetscFunctionBegin; 24942d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2495085a36d4SBarry Smith row = im[k]; 2496085a36d4SBarry Smith brow = row / bs; 24975ef9f2a5SBarry Smith if (row < 0) continue; 24986bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24992d61bbb3SSatish Balay rp = aj + ai[brow]; 2500672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[brow]; 25012d61bbb3SSatish Balay rmax = imax[brow]; 25022d61bbb3SSatish Balay nrow = ailen[brow]; 25032d61bbb3SSatish Balay low = 0; 2504c71e6ed7SBarry Smith high = nrow; 25052d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 25065ef9f2a5SBarry Smith if (in[l] < 0) continue; 25076bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 25089371c9d4SSatish Balay col = in[l]; 25099371c9d4SSatish Balay bcol = col / bs; 25109371c9d4SSatish Balay ridx = row % bs; 25119371c9d4SSatish Balay cidx = col % bs; 2512672ba085SHong Zhang if (!A->structure_only) { 25132d61bbb3SSatish Balay if (roworiented) { 25145ef9f2a5SBarry Smith value = v[l + k * n]; 25152d61bbb3SSatish Balay } else { 25162d61bbb3SSatish Balay value = v[k + l * m]; 25172d61bbb3SSatish Balay } 2518672ba085SHong Zhang } 25199371c9d4SSatish Balay if (col <= lastcol) low = 0; 25209371c9d4SSatish Balay else high = nrow; 2521e2ee6c50SBarry Smith lastcol = col; 25222d61bbb3SSatish Balay while (high - low > 7) { 25232d61bbb3SSatish Balay t = (low + high) / 2; 25242d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 25252d61bbb3SSatish Balay else low = t; 25262d61bbb3SSatish Balay } 25272d61bbb3SSatish Balay for (i = low; i < high; i++) { 25282d61bbb3SSatish Balay if (rp[i] > bcol) break; 25292d61bbb3SSatish Balay if (rp[i] == bcol) { 25302d61bbb3SSatish Balay bap = ap + bs2 * i + bs * cidx + ridx; 2531672ba085SHong Zhang if (!A->structure_only) { 25322d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 25332d61bbb3SSatish Balay else *bap = value; 2534672ba085SHong Zhang } 25352d61bbb3SSatish Balay goto noinsert1; 25362d61bbb3SSatish Balay } 25372d61bbb3SSatish Balay } 25382d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 25395f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2540672ba085SHong Zhang if (A->structure_only) { 2541672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2542672ba085SHong Zhang } else { 2543fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2544672ba085SHong Zhang } 25459371c9d4SSatish Balay N = nrow++ - 1; 25469371c9d4SSatish Balay high++; 25472d61bbb3SSatish Balay /* shift up all the later entries in this row */ 25489566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 25492d61bbb3SSatish Balay rp[i] = bcol; 2550580bdb30SBarry Smith if (!A->structure_only) { 25519566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 25529566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2553580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2554580bdb30SBarry Smith } 2555085a36d4SBarry Smith a->nz++; 2556e56f5c9eSBarry Smith A->nonzerostate++; 25572d61bbb3SSatish Balay noinsert1:; 25582d61bbb3SSatish Balay low = i; 25592d61bbb3SSatish Balay } 25602d61bbb3SSatish Balay ailen[brow] = nrow; 25612d61bbb3SSatish Balay } 25623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25632d61bbb3SSatish Balay } 25642d61bbb3SSatish Balay 2565ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2566d71ae5a4SJacob Faibussowitsch { 25672d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 25682d61bbb3SSatish Balay Mat outA; 2569ace3abfcSBarry Smith PetscBool row_identity, col_identity; 25702d61bbb3SSatish Balay 25712d61bbb3SSatish Balay PetscFunctionBegin; 25725f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 25739566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25749566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25755f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25762d61bbb3SSatish Balay 25772d61bbb3SSatish Balay outA = inA; 2578d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25799566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25809566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25812d61bbb3SSatish Balay 25829566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2583cf242676SKris Buschelman 25849566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2586c3122656SLisandro Dalcin a->row = row; 25879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2589c3122656SLisandro Dalcin a->col = col; 2590c38d4ed2SBarry Smith 2591c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25939566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2594c38d4ed2SBarry Smith 25959566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2596aa624791SPierre Jolivet if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 25979566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25992d61bbb3SSatish Balay } 2600d9b7c43dSSatish Balay 2601ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2602d71ae5a4SJacob Faibussowitsch { 260327a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 260427a8da17SBarry Smith 260527a8da17SBarry Smith PetscFunctionBegin; 2606ff6a9541SJacob Faibussowitsch baij->nz = baij->maxnz; 2607ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2608ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 26093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 261027a8da17SBarry Smith } 261127a8da17SBarry Smith 261227a8da17SBarry Smith /*@ 261311a5261eSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 261427a8da17SBarry Smith 261527a8da17SBarry Smith Input Parameters: 261611a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix 261727a8da17SBarry Smith - indices - the column indices 261827a8da17SBarry Smith 261915091d37SBarry Smith Level: advanced 262015091d37SBarry Smith 262127a8da17SBarry Smith Notes: 262227a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 262327a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 262411a5261eSBarry Smith of the `MatSetValues()` operation. 262527a8da17SBarry Smith 262627a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 262711a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 262827a8da17SBarry Smith 262911a5261eSBarry Smith MUST be called before any calls to `MatSetValues()` 263027a8da17SBarry Smith 26311cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 263227a8da17SBarry Smith @*/ 2633d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2634d71ae5a4SJacob Faibussowitsch { 263527a8da17SBarry Smith PetscFunctionBegin; 26360700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2637dadcf809SJacob Faibussowitsch PetscValidIntPointer(indices, 2); 2638cac4c232SBarry Smith PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 26393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 264027a8da17SBarry Smith } 264127a8da17SBarry Smith 2642d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2643d71ae5a4SJacob Faibussowitsch { 2644273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2645c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2646273d9f13SBarry Smith PetscReal atmp; 264787828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2648273d9f13SBarry Smith MatScalar *aa; 2649c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2650273d9f13SBarry Smith 2651273d9f13SBarry Smith PetscFunctionBegin; 26525f80ce2aSJacob Faibussowitsch /* why is this not a macro???????????????????????????????????????????????????????????????? */ 26535f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2654d0f46423SBarry Smith bs = A->rmap->bs; 2655273d9f13SBarry Smith aa = a->a; 2656273d9f13SBarry Smith ai = a->i; 2657273d9f13SBarry Smith aj = a->j; 2658273d9f13SBarry Smith mbs = a->mbs; 2659273d9f13SBarry Smith 26609566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 26619566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 26629566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 26635f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2664273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 26659371c9d4SSatish Balay ncols = ai[1] - ai[0]; 26669371c9d4SSatish Balay ai++; 2667273d9f13SBarry Smith brow = bs * i; 2668273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2669273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2670273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 26719371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 26729371c9d4SSatish Balay aa++; 2673273d9f13SBarry Smith row = brow + krow; /* row index */ 26749371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 26759371c9d4SSatish Balay x[row] = atmp; 26769371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26779371c9d4SSatish Balay } 2678273d9f13SBarry Smith } 2679273d9f13SBarry Smith } 2680273d9f13SBarry Smith aj++; 2681273d9f13SBarry Smith } 2682273d9f13SBarry Smith } 26839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 26843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2685273d9f13SBarry Smith } 2686273d9f13SBarry Smith 2687d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2688d71ae5a4SJacob Faibussowitsch { 26893c896bc6SHong Zhang PetscFunctionBegin; 26903c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 26913c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 26923c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26933c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2694d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 26953c896bc6SHong Zhang 26965f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 26975f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 26989566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 26999566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 27003c896bc6SHong Zhang } else { 27019566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 27023c896bc6SHong Zhang } 27033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27043c896bc6SHong Zhang } 27053c896bc6SHong Zhang 2706d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2707d71ae5a4SJacob Faibussowitsch { 2708f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27096e111a19SKarl Rupp 2710f2a5309cSSatish Balay PetscFunctionBegin; 2711f2a5309cSSatish Balay *array = a->a; 27123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2713f2a5309cSSatish Balay } 2714f2a5309cSSatish Balay 2715d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2716d71ae5a4SJacob Faibussowitsch { 2717f2a5309cSSatish Balay PetscFunctionBegin; 2718cda14afcSprj- *array = NULL; 27193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2720f2a5309cSSatish Balay } 2721f2a5309cSSatish Balay 2722d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2723d71ae5a4SJacob Faibussowitsch { 2724b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 272552768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 272652768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 272752768537SHong Zhang 272852768537SHong Zhang PetscFunctionBegin; 272952768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 27309566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 27313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 273252768537SHong Zhang } 273352768537SHong Zhang 2734d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2735d71ae5a4SJacob Faibussowitsch { 273642ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 273731ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2738e838b9e7SJed Brown PetscBLASInt one = 1; 273942ee4b1aSHong Zhang 274042ee4b1aSHong Zhang PetscFunctionBegin; 2741134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2742134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2743134adf20SPierre Jolivet if (e) { 27449566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2745134adf20SPierre Jolivet if (e) { 27469566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2747134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2748134adf20SPierre Jolivet } 2749134adf20SPierre Jolivet } 275054c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2751134adf20SPierre Jolivet } 275242ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2753f4df32b1SMatthew Knepley PetscScalar alpha = a; 2754c5df96a5SBarry Smith PetscBLASInt bnz; 27559566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2756792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 27579566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2758ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 27599566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 276042ee4b1aSHong Zhang } else { 276152768537SHong Zhang Mat B; 276252768537SHong Zhang PetscInt *nnz; 276354c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 27659566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 27669566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 27679566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 27689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 27699566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 27709566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 27719566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 27729566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 27739566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 27749566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 277542ee4b1aSHong Zhang } 27763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 277742ee4b1aSHong Zhang } 277842ee4b1aSHong Zhang 2779d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2780d71ae5a4SJacob Faibussowitsch { 2781ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 27822726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27832726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 27842726fb6dSPierre Jolivet MatScalar *aa = a->a; 27852726fb6dSPierre Jolivet 27862726fb6dSPierre Jolivet PetscFunctionBegin; 27872726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 27883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2789ff6a9541SJacob Faibussowitsch #else 2790ff6a9541SJacob Faibussowitsch (void)A; 2791ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2792ff6a9541SJacob Faibussowitsch #endif 27932726fb6dSPierre Jolivet } 27942726fb6dSPierre Jolivet 2795ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2796d71ae5a4SJacob Faibussowitsch { 2797ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 279899cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 279999cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2800dd6ea824SBarry Smith MatScalar *aa = a->a; 280199cafbc1SBarry Smith 280299cafbc1SBarry Smith PetscFunctionBegin; 280399cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 28043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2805ff6a9541SJacob Faibussowitsch #else 2806ff6a9541SJacob Faibussowitsch (void)A; 2807ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2808ff6a9541SJacob Faibussowitsch #endif 280999cafbc1SBarry Smith } 281099cafbc1SBarry Smith 2811ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2812d71ae5a4SJacob Faibussowitsch { 2813ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX) 281499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 281599cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2816dd6ea824SBarry Smith MatScalar *aa = a->a; 281799cafbc1SBarry Smith 281899cafbc1SBarry Smith PetscFunctionBegin; 281999cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 28203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2821ff6a9541SJacob Faibussowitsch #else 2822ff6a9541SJacob Faibussowitsch (void)A; 2823ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS; 2824ff6a9541SJacob Faibussowitsch #endif 282599cafbc1SBarry Smith } 282699cafbc1SBarry Smith 28273acb8795SBarry Smith /* 28282479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 28293acb8795SBarry Smith */ 2830ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2831d71ae5a4SJacob Faibussowitsch { 28323acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 28333acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 28343acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 28353acb8795SBarry Smith 28363acb8795SBarry Smith PetscFunctionBegin; 28373acb8795SBarry Smith *nn = n; 28383ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28395f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 28409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28433acb8795SBarry Smith jj = a->j; 2844ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 28453acb8795SBarry Smith cia[0] = oshift; 2846ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28479566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 28483acb8795SBarry Smith jj = a->j; 28493acb8795SBarry Smith for (row = 0; row < m; row++) { 28503acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 28513acb8795SBarry Smith for (i = 0; i < mr; i++) { 28523acb8795SBarry Smith col = *jj++; 285326fbe8dcSKarl Rupp 28543acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 28553acb8795SBarry Smith } 28563acb8795SBarry Smith } 28579566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 28589371c9d4SSatish Balay *ia = cia; 28599371c9d4SSatish Balay *ja = cja; 28603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28613acb8795SBarry Smith } 28623acb8795SBarry Smith 2863ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2864d71ae5a4SJacob Faibussowitsch { 28653acb8795SBarry Smith PetscFunctionBegin; 28663ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 28679566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 28689566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 28693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28703acb8795SBarry Smith } 28713acb8795SBarry Smith 2872525d23c0SHong Zhang /* 2873525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2874525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2875040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2876525d23c0SHong Zhang */ 2877d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2878d71ae5a4SJacob Faibussowitsch { 2879525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2880c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2881525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2882525d23c0SHong Zhang PetscInt *cspidx; 2883f6d58c54SBarry Smith 2884f6d58c54SBarry Smith PetscFunctionBegin; 2885525d23c0SHong Zhang *nn = n; 28863ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2887f6d58c54SBarry Smith 28889566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2892525d23c0SHong Zhang jj = a->j; 2893ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 2894525d23c0SHong Zhang cia[0] = oshift; 2895ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28969566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2897525d23c0SHong Zhang jj = a->j; 2898525d23c0SHong Zhang for (row = 0; row < m; row++) { 2899525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2900525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2901525d23c0SHong Zhang col = *jj++; 2902525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2903525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2904525d23c0SHong Zhang } 2905525d23c0SHong Zhang } 29069566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2907071fcb05SBarry Smith *ia = cia; 2908071fcb05SBarry Smith *ja = cja; 2909525d23c0SHong Zhang *spidx = cspidx; 29103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2911f6d58c54SBarry Smith } 2912f6d58c54SBarry Smith 2913d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2914d71ae5a4SJacob Faibussowitsch { 2915525d23c0SHong Zhang PetscFunctionBegin; 29169566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 29179566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 29183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2919f6d58c54SBarry Smith } 292099cafbc1SBarry Smith 2921d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2922d71ae5a4SJacob Faibussowitsch { 29237d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 29247d68702bSBarry Smith 29257d68702bSBarry Smith PetscFunctionBegin; 292648a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 29279566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 29283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29297d68702bSBarry Smith } 29307d68702bSBarry Smith 2931dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2932cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2933cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2934cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 293597304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 29367c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 29377c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2938f4259b30SLisandro Dalcin NULL, 2939f4259b30SLisandro Dalcin NULL, 2940f4259b30SLisandro Dalcin NULL, 2941f4259b30SLisandro Dalcin /* 10*/ NULL, 2942cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2943f4259b30SLisandro Dalcin NULL, 2944f4259b30SLisandro Dalcin NULL, 2945f2501298SSatish Balay MatTranspose_SeqBAIJ, 294697304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2947cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2948cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2949cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2950cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2951f4259b30SLisandro Dalcin /* 20*/ NULL, 2952cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2953cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2954cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2955d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2956f4259b30SLisandro Dalcin NULL, 2957f4259b30SLisandro Dalcin NULL, 2958f4259b30SLisandro Dalcin NULL, 2959f4259b30SLisandro Dalcin NULL, 296026cec326SBarry Smith /* 29*/ MatSetUp_Seq_Hash, 2961f4259b30SLisandro Dalcin NULL, 2962f4259b30SLisandro Dalcin NULL, 2963f4259b30SLisandro Dalcin NULL, 2964f4259b30SLisandro Dalcin NULL, 2965d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2966f4259b30SLisandro Dalcin NULL, 2967f4259b30SLisandro Dalcin NULL, 2968cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2969f4259b30SLisandro Dalcin NULL, 2970d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 29717dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2972cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2973cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 29743c896bc6SHong Zhang MatCopy_SeqBAIJ, 2975f4259b30SLisandro Dalcin /* 44*/ NULL, 2976cc2dc46cSBarry Smith MatScale_SeqBAIJ, 29777d68702bSBarry Smith MatShift_SeqBAIJ, 2978f4259b30SLisandro Dalcin NULL, 297997b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2980f4259b30SLisandro Dalcin /* 49*/ NULL, 29813b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 298292c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 29833acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 29843acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 298593dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2986f4259b30SLisandro Dalcin NULL, 2987f4259b30SLisandro Dalcin NULL, 2988090001bdSToby Isaac NULL, 2989d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 29907dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2991b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2992b9b97703SBarry Smith MatView_SeqBAIJ, 2993f4259b30SLisandro Dalcin NULL, 2994f4259b30SLisandro Dalcin NULL, 2995f4259b30SLisandro Dalcin /* 64*/ NULL, 2996f4259b30SLisandro Dalcin NULL, 2997f4259b30SLisandro Dalcin NULL, 2998f4259b30SLisandro Dalcin NULL, 2999f4259b30SLisandro Dalcin NULL, 3000d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3001f4259b30SLisandro Dalcin NULL, 3002c87e5d42SMatthew Knepley MatConvert_Basic, 3003f4259b30SLisandro Dalcin NULL, 3004f4259b30SLisandro Dalcin NULL, 3005f4259b30SLisandro Dalcin /* 74*/ NULL, 3006f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 3007f4259b30SLisandro Dalcin NULL, 3008f4259b30SLisandro Dalcin NULL, 3009f4259b30SLisandro Dalcin NULL, 3010f4259b30SLisandro Dalcin /* 79*/ NULL, 3011f4259b30SLisandro Dalcin NULL, 3012f4259b30SLisandro Dalcin NULL, 3013f4259b30SLisandro Dalcin NULL, 30145bba2384SShri Abhyankar MatLoad_SeqBAIJ, 3015f4259b30SLisandro Dalcin /* 84*/ NULL, 3016f4259b30SLisandro Dalcin NULL, 3017f4259b30SLisandro Dalcin NULL, 3018f4259b30SLisandro Dalcin NULL, 3019f4259b30SLisandro Dalcin NULL, 3020f4259b30SLisandro Dalcin /* 89*/ NULL, 3021f4259b30SLisandro Dalcin NULL, 3022f4259b30SLisandro Dalcin NULL, 3023f4259b30SLisandro Dalcin NULL, 3024f4259b30SLisandro Dalcin NULL, 3025f4259b30SLisandro Dalcin /* 94*/ NULL, 3026f4259b30SLisandro Dalcin NULL, 3027f4259b30SLisandro Dalcin NULL, 3028f4259b30SLisandro Dalcin NULL, 3029f4259b30SLisandro Dalcin NULL, 3030f4259b30SLisandro Dalcin /* 99*/ NULL, 3031f4259b30SLisandro Dalcin NULL, 3032f4259b30SLisandro Dalcin NULL, 30332726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 3034f4259b30SLisandro Dalcin NULL, 3035f4259b30SLisandro Dalcin /*104*/ NULL, 303699cafbc1SBarry Smith MatRealPart_SeqBAIJ, 30372af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 3038f4259b30SLisandro Dalcin NULL, 3039f4259b30SLisandro Dalcin NULL, 3040f4259b30SLisandro Dalcin /*109*/ NULL, 3041f4259b30SLisandro Dalcin NULL, 3042f4259b30SLisandro Dalcin NULL, 3043f4259b30SLisandro Dalcin NULL, 3044547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 3045f4259b30SLisandro Dalcin /*114*/ NULL, 3046f4259b30SLisandro Dalcin NULL, 3047f4259b30SLisandro Dalcin NULL, 3048f4259b30SLisandro Dalcin NULL, 3049f4259b30SLisandro Dalcin NULL, 3050f4259b30SLisandro Dalcin /*119*/ NULL, 3051f4259b30SLisandro Dalcin NULL, 3052547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 3053d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 3054f4259b30SLisandro Dalcin NULL, 3055f4259b30SLisandro Dalcin /*124*/ NULL, 3056857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 30573964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 3058f4259b30SLisandro Dalcin NULL, 3059f4259b30SLisandro Dalcin NULL, 3060f4259b30SLisandro Dalcin /*129*/ NULL, 3061f4259b30SLisandro Dalcin NULL, 3062f4259b30SLisandro Dalcin NULL, 3063f4259b30SLisandro Dalcin NULL, 3064f4259b30SLisandro Dalcin NULL, 3065f4259b30SLisandro Dalcin /*134*/ NULL, 3066f4259b30SLisandro Dalcin NULL, 3067f4259b30SLisandro Dalcin NULL, 3068f4259b30SLisandro Dalcin NULL, 3069f4259b30SLisandro Dalcin NULL, 307046533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 3071f4259b30SLisandro Dalcin NULL, 3072f4259b30SLisandro Dalcin NULL, 3073bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 3074f4259b30SLisandro Dalcin NULL, 307586e85357SHong Zhang /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3076d70f29a3SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 3077d70f29a3SPierre Jolivet NULL, 307899a7f59eSMark Adams NULL, 307999a7f59eSMark Adams NULL, 30807fb60732SBarry Smith NULL, 30817fb60732SBarry Smith /*150*/ NULL, 3082dec0b466SHong Zhang NULL}; 30832593348eSBarry Smith 3084ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3085d71ae5a4SJacob Faibussowitsch { 30863e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30878ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30883e90b805SBarry Smith 30893e90b805SBarry Smith PetscFunctionBegin; 30905f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30913e90b805SBarry Smith 30923e90b805SBarry Smith /* allocate space for values if not already there */ 3093ff6a9541SJacob Faibussowitsch if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 30943e90b805SBarry Smith 30953e90b805SBarry Smith /* copy values over */ 30969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 30973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30983e90b805SBarry Smith } 30993e90b805SBarry Smith 3100ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3101d71ae5a4SJacob Faibussowitsch { 31023e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 31038ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 31043e90b805SBarry Smith 31053e90b805SBarry Smith PetscFunctionBegin; 31065f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 31075f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 31083e90b805SBarry Smith 31093e90b805SBarry Smith /* copy values over */ 31109566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 31113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31123e90b805SBarry Smith } 31133e90b805SBarry Smith 3114cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3115cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3116273d9f13SBarry Smith 3117d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3118d71ae5a4SJacob Faibussowitsch { 3119ad79cf63SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3120535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 31218afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3122a23d5eceSKris Buschelman 3123a23d5eceSKris Buschelman PetscFunctionBegin; 3124ad79cf63SBarry Smith if (B->hash_active) { 3125ad79cf63SBarry Smith PetscInt bs; 3126ad79cf63SBarry Smith PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 3127ad79cf63SBarry Smith PetscCall(PetscHMapIJVDestroy(&b->ht)); 3128ad79cf63SBarry Smith PetscCall(MatGetBlockSize(B, &bs)); 3129ad79cf63SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3130ad79cf63SBarry Smith PetscCall(PetscFree(b->dnz)); 3131ad79cf63SBarry Smith PetscCall(PetscFree(b->bdnz)); 3132ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 3133ad79cf63SBarry Smith } 31342576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3135ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3136ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3137ab93d7beSBarry Smith nz = 0; 3138ab93d7beSBarry Smith } 31398c07d4e3SBarry Smith 31409566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 31419566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 31429566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 31439566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3144899cda47SBarry Smith 3145899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3146899cda47SBarry Smith 3147d0f46423SBarry Smith mbs = B->rmap->n / bs; 3148d0f46423SBarry Smith nbs = B->cmap->n / bs; 3149a23d5eceSKris Buschelman bs2 = bs * bs; 3150a23d5eceSKris Buschelman 31515f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3152a23d5eceSKris Buschelman 3153a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 31545f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3155a23d5eceSKris Buschelman if (nnz) { 3156a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 31575f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 31585f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3159a23d5eceSKris Buschelman } 3160a23d5eceSKris Buschelman } 3161a23d5eceSKris Buschelman 3162d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 31639566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3164d0609cedSBarry Smith PetscOptionsEnd(); 31658c07d4e3SBarry Smith 3166a23d5eceSKris Buschelman if (!flg) { 3167a23d5eceSKris Buschelman switch (bs) { 3168a23d5eceSKris Buschelman case 1: 3169a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3170a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3171a23d5eceSKris Buschelman break; 3172a23d5eceSKris Buschelman case 2: 3173a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3174a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3175a23d5eceSKris Buschelman break; 3176a23d5eceSKris Buschelman case 3: 3177a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3178a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3179a23d5eceSKris Buschelman break; 3180a23d5eceSKris Buschelman case 4: 3181a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3182a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3183a23d5eceSKris Buschelman break; 3184a23d5eceSKris Buschelman case 5: 3185a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3186a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3187a23d5eceSKris Buschelman break; 3188a23d5eceSKris Buschelman case 6: 3189a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3190a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3191a23d5eceSKris Buschelman break; 3192a23d5eceSKris Buschelman case 7: 3193a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3194a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3195a23d5eceSKris Buschelman break; 31969371c9d4SSatish Balay case 9: { 31976679dcc1SBarry Smith PetscInt version = 1; 31989566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31996679dcc1SBarry Smith switch (version) { 32005f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32016679dcc1SBarry Smith case 1: 320296e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 320396e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 32049566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32056679dcc1SBarry Smith break; 32066679dcc1SBarry Smith #endif 32076679dcc1SBarry Smith default: 320896e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 320996e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32109566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 321196e086a2SDaniel Kokron break; 32126679dcc1SBarry Smith } 32136679dcc1SBarry Smith break; 32146679dcc1SBarry Smith } 3215ebada01fSBarry Smith case 11: 3216ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3217ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3218ebada01fSBarry Smith break; 32199371c9d4SSatish Balay case 12: { 32206679dcc1SBarry Smith PetscInt version = 1; 32219566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32226679dcc1SBarry Smith switch (version) { 32236679dcc1SBarry Smith case 1: 32246679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 32256679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 32269566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32278ab949d8SShri Abhyankar break; 32286679dcc1SBarry Smith case 2: 32296679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 32306679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 32319566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32326679dcc1SBarry Smith break; 32336679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 32346679dcc1SBarry Smith case 3: 32356679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 32366679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 32379566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32386679dcc1SBarry Smith break; 32396679dcc1SBarry Smith #endif 3240a23d5eceSKris Buschelman default: 3241a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3242a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32439566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32446679dcc1SBarry Smith break; 32456679dcc1SBarry Smith } 32466679dcc1SBarry Smith break; 32476679dcc1SBarry Smith } 32489371c9d4SSatish Balay case 15: { 32496679dcc1SBarry Smith PetscInt version = 1; 32509566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 32516679dcc1SBarry Smith switch (version) { 32526679dcc1SBarry Smith case 1: 32536679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 32549566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32556679dcc1SBarry Smith break; 32566679dcc1SBarry Smith case 2: 32576679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 32589566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32596679dcc1SBarry Smith break; 32606679dcc1SBarry Smith case 3: 32616679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 32629566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32636679dcc1SBarry Smith break; 32646679dcc1SBarry Smith case 4: 32656679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 32669566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 32676679dcc1SBarry Smith break; 32686679dcc1SBarry Smith default: 32696679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 32709566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 32716679dcc1SBarry Smith break; 32726679dcc1SBarry Smith } 32736679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32746679dcc1SBarry Smith break; 32756679dcc1SBarry Smith } 32766679dcc1SBarry Smith default: 32776679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 32786679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32799566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3280a23d5eceSKris Buschelman break; 3281a23d5eceSKris Buschelman } 3282a23d5eceSKris Buschelman } 3283e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3284a23d5eceSKris Buschelman b->mbs = mbs; 3285a23d5eceSKris Buschelman b->nbs = nbs; 3286ab93d7beSBarry Smith if (!skipallocation) { 32872ee49352SLisandro Dalcin if (!b->imax) { 32889566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 328926fbe8dcSKarl Rupp 32904fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 32912ee49352SLisandro Dalcin } 3292ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 329326fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3294a23d5eceSKris Buschelman if (!nnz) { 3295a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3296c62bd62aSJed Brown else if (nz < 0) nz = 1; 32975d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3298a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 32999566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3300a23d5eceSKris Buschelman } else { 3301c73702f5SBarry Smith PetscInt64 nz64 = 0; 33029371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 33039371c9d4SSatish Balay b->imax[i] = nnz[i]; 33049371c9d4SSatish Balay nz64 += nnz[i]; 33059371c9d4SSatish Balay } 33069566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3307a23d5eceSKris Buschelman } 3308a23d5eceSKris Buschelman 3309a23d5eceSKris Buschelman /* allocate the matrix space */ 33109566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3311672ba085SHong Zhang if (B->structure_only) { 33129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &b->j)); 33139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3314672ba085SHong Zhang } else { 33156679dcc1SBarry Smith PetscInt nzbs2 = 0; 33169566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 33179566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 33189566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->a, nz * bs2)); 3319672ba085SHong Zhang } 33209566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->j, nz)); 332126fbe8dcSKarl Rupp 3322672ba085SHong Zhang if (B->structure_only) { 3323672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3324672ba085SHong Zhang b->free_a = PETSC_FALSE; 3325672ba085SHong Zhang } else { 3326a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3327672ba085SHong Zhang b->free_a = PETSC_TRUE; 3328672ba085SHong Zhang } 3329672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3330672ba085SHong Zhang 3331a23d5eceSKris Buschelman b->i[0] = 0; 3332ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3333672ba085SHong Zhang 3334e811da20SHong Zhang } else { 3335e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3336e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3337ab93d7beSBarry Smith } 3338a23d5eceSKris Buschelman 3339a23d5eceSKris Buschelman b->bs2 = bs2; 3340a23d5eceSKris Buschelman b->mbs = mbs; 3341a23d5eceSKris Buschelman b->nz = 0; 3342b32cb4a7SJed Brown b->maxnz = nz; 3343b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3344cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3345cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 33469566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 33473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3348a23d5eceSKris Buschelman } 3349a23d5eceSKris Buschelman 3350d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3351d71ae5a4SJacob Faibussowitsch { 3352725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3353f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3354d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3355725b52f3SLisandro Dalcin 3356725b52f3SLisandro Dalcin PetscFunctionBegin; 33575f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 33589566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 33599566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 33609566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 33619566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 33629566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3363d0f46423SBarry Smith m = B->rmap->n / bs; 3364725b52f3SLisandro Dalcin 33655f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 33669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3367725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3368cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 33695f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3370725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3371725b52f3SLisandro Dalcin nnz[i] = nz; 3372725b52f3SLisandro Dalcin } 33739566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 33749566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3375725b52f3SLisandro Dalcin 3376725b52f3SLisandro Dalcin values = (PetscScalar *)V; 337748a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3378725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3379cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3380cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3381bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3382cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 33839566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 33843adadaf3SJed Brown } else { 33853adadaf3SJed Brown PetscInt j; 33863adadaf3SJed Brown for (j = 0; j < ncols; j++) { 33873adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 33889566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 33893adadaf3SJed Brown } 33903adadaf3SJed Brown } 3391725b52f3SLisandro Dalcin } 33929566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 33939566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 33949566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 33959566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 33963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3397725b52f3SLisandro Dalcin } 3398725b52f3SLisandro Dalcin 3399cda14afcSprj- /*@C 340011a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3401cda14afcSprj- 3402cda14afcSprj- Not Collective 3403cda14afcSprj- 3404cda14afcSprj- Input Parameter: 340511a5261eSBarry Smith . mat - a `MATSEQBAIJ` matrix 3406cda14afcSprj- 3407cda14afcSprj- Output Parameter: 3408cda14afcSprj- . array - pointer to the data 3409cda14afcSprj- 3410cda14afcSprj- Level: intermediate 3411cda14afcSprj- 34121cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3413cda14afcSprj- @*/ 3414d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3415d71ae5a4SJacob Faibussowitsch { 3416cda14afcSprj- PetscFunctionBegin; 3417cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 34183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3419cda14afcSprj- } 3420cda14afcSprj- 3421cda14afcSprj- /*@C 342211a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3423cda14afcSprj- 3424cda14afcSprj- Not Collective 3425cda14afcSprj- 3426cda14afcSprj- Input Parameters: 342711a5261eSBarry Smith + mat - a `MATSEQBAIJ` matrix 3428cda14afcSprj- - array - pointer to the data 3429cda14afcSprj- 3430cda14afcSprj- Level: intermediate 3431cda14afcSprj- 34321cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3433cda14afcSprj- @*/ 3434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3435d71ae5a4SJacob Faibussowitsch { 3436cda14afcSprj- PetscFunctionBegin; 3437cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 34383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3439cda14afcSprj- } 3440cda14afcSprj- 34410bad9183SKris Buschelman /*MC 3442fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 34430bad9183SKris Buschelman block sparse compressed row format. 34440bad9183SKris Buschelman 34450bad9183SKris Buschelman Options Database Keys: 344620f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 34476679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 34480bad9183SKris Buschelman 34490bad9183SKris Buschelman Level: beginner 34500cd7f59aSBarry Smith 34510cd7f59aSBarry Smith Notes: 345211a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 345311a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 34540bad9183SKris Buschelman 34552ef1f0ffSBarry Smith Run with `-info` to see what version of the matrix-vector product is being used 34566679dcc1SBarry Smith 34571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 34580bad9183SKris Buschelman M*/ 34590bad9183SKris Buschelman 3460cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3461b24902e0SBarry Smith 3462d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3463d71ae5a4SJacob Faibussowitsch { 3464c1ac3661SBarry Smith PetscMPIInt size; 3465b6490206SBarry Smith Mat_SeqBAIJ *b; 34663b2fbd54SBarry Smith 34673a40ed3dSBarry Smith PetscFunctionBegin; 34689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 34695f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3470b6490206SBarry Smith 34714dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 3472b0a32e0cSBarry Smith B->data = (void *)b; 34739566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 347426fbe8dcSKarl Rupp 3475f4259b30SLisandro Dalcin b->row = NULL; 3476f4259b30SLisandro Dalcin b->col = NULL; 3477f4259b30SLisandro Dalcin b->icol = NULL; 34782593348eSBarry Smith b->reallocs = 0; 3479f4259b30SLisandro Dalcin b->saved_values = NULL; 34802593348eSBarry Smith 3481c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 34822593348eSBarry Smith b->nonew = 0; 3483f4259b30SLisandro Dalcin b->diag = NULL; 3484f4259b30SLisandro Dalcin B->spptr = NULL; 3485b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3486a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 34874e220ebcSLois Curfman McInnes 34889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 34899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 34909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 34919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 34929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 34939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 34949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 34959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 34969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 34979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 34987ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 34999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 35007ea3e4caSstefano_zampini #endif 35019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 35029566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 35033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35042593348eSBarry Smith } 35052593348eSBarry Smith 3506d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3507d71ae5a4SJacob Faibussowitsch { 3508b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3509a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3510de6a44a3SBarry Smith 35113a40ed3dSBarry Smith PetscFunctionBegin; 351231fe6a7dSBarry Smith PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 35135f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 35142593348eSBarry Smith 35154fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35164fd072dbSBarry Smith c->imax = a->imax; 35174fd072dbSBarry Smith c->ilen = a->ilen; 35184fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 35194fd072dbSBarry Smith } else { 35209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3521b6490206SBarry Smith for (i = 0; i < mbs; i++) { 35222593348eSBarry Smith c->imax[i] = a->imax[i]; 35232593348eSBarry Smith c->ilen[i] = a->ilen[i]; 35242593348eSBarry Smith } 35254fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 35264fd072dbSBarry Smith } 35272593348eSBarry Smith 35282593348eSBarry Smith /* allocate the matrix space */ 352916a2bf60SHong Zhang if (mallocmatspace) { 35304fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35319566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 353226fbe8dcSKarl Rupp 35334fd072dbSBarry Smith c->i = a->i; 35344fd072dbSBarry Smith c->j = a->j; 3535379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3536379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3537379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 35384fd072dbSBarry Smith c->parent = A; 35391e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 35401e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 354126fbe8dcSKarl Rupp 35429566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 35439566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35449566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35454fd072dbSBarry Smith } else { 35469566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 354726fbe8dcSKarl Rupp 3548c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3549379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 35504fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 355126fbe8dcSKarl Rupp 35529566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3553b6490206SBarry Smith if (mbs > 0) { 35549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 35552e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 35569566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 35572e8a6d31SBarry Smith } else { 35589566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 35592593348eSBarry Smith } 35602593348eSBarry Smith } 35611e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 35621e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 356316a2bf60SHong Zhang } 35644fd072dbSBarry Smith } 356516a2bf60SHong Zhang 35662593348eSBarry Smith c->roworiented = a->roworiented; 35672593348eSBarry Smith c->nonew = a->nonew; 356826fbe8dcSKarl Rupp 35699566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 35709566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 357126fbe8dcSKarl Rupp 35725c9eb25fSBarry Smith c->bs2 = a->bs2; 35735c9eb25fSBarry Smith c->mbs = a->mbs; 35745c9eb25fSBarry Smith c->nbs = a->nbs; 35752593348eSBarry Smith 35762593348eSBarry Smith if (a->diag) { 35774fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 35784fd072dbSBarry Smith c->diag = a->diag; 35794fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 35804fd072dbSBarry Smith } else { 35819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 358226fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 35834fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 35844fd072dbSBarry Smith } 3585f4259b30SLisandro Dalcin } else c->diag = NULL; 358626fbe8dcSKarl Rupp 35872593348eSBarry Smith c->nz = a->nz; 3588f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3589f361c04dSBarry Smith c->solve_work = NULL; 3590f361c04dSBarry Smith c->mult_work = NULL; 3591f361c04dSBarry Smith c->sor_workt = NULL; 3592f361c04dSBarry Smith c->sor_work = NULL; 359388e51ccdSHong Zhang 359488e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 359588e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3596cd6b891eSBarry Smith if (a->compressedrow.use) { 359788e51ccdSHong Zhang i = a->compressedrow.nrows; 35989566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 35999566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 36009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 360188e51ccdSHong Zhang } else { 360288e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 36030298fd71SBarry Smith c->compressedrow.i = NULL; 36040298fd71SBarry Smith c->compressedrow.rindex = NULL; 360588e51ccdSHong Zhang } 3606e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 360726fbe8dcSKarl Rupp 36089566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 36093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36102593348eSBarry Smith } 36112593348eSBarry Smith 3612d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3613d71ae5a4SJacob Faibussowitsch { 3614b24902e0SBarry Smith PetscFunctionBegin; 36159566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 36169566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 36179566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 36189566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 36193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3620b24902e0SBarry Smith } 3621b24902e0SBarry Smith 3622618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3623d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3624d71ae5a4SJacob Faibussowitsch { 3625b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3626b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3627b51a4376SLisandro Dalcin PetscScalar *matvals; 3628b51a4376SLisandro Dalcin 3629b51a4376SLisandro Dalcin PetscFunctionBegin; 36309566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3631b51a4376SLisandro Dalcin 3632b51a4376SLisandro Dalcin /* read matrix header */ 36339566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 36345f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 36359371c9d4SSatish Balay M = header[1]; 36369371c9d4SSatish Balay N = header[2]; 36379371c9d4SSatish Balay nz = header[3]; 36385f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 36395f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 36405f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3641b51a4376SLisandro Dalcin 3642b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 36439566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3644b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3645b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3646b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3647b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3648b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 36499566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 36509566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3651b51a4376SLisandro Dalcin 3652b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 36539566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 36545f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 36559566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 36569566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 36579371c9d4SSatish Balay mbs = m / bs; 36589371c9d4SSatish Balay nbs = n / bs; 3659b51a4376SLisandro Dalcin 3660b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 36619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 36629566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 36639371c9d4SSatish Balay rowidxs[0] = 0; 36649371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3665b51a4376SLisandro Dalcin sum = rowidxs[m]; 36665f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3667b51a4376SLisandro Dalcin 3668b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 36699566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 36709566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 36719566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3672b51a4376SLisandro Dalcin 3673b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3674b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3675b51a4376SLisandro Dalcin PetscInt *nnz; 3676618cc2edSLisandro Dalcin PetscBool sbaij; 3677b51a4376SLisandro Dalcin 36789566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 36799566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 36809566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3681b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 36829566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3683618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3684618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3685618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3686618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3687618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3688618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3689618cc2edSLisandro Dalcin } 3690618cc2edSLisandro Dalcin } 3691b51a4376SLisandro Dalcin } 36929566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 36939566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 36949566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 36959566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3696b51a4376SLisandro Dalcin } 3697b51a4376SLisandro Dalcin 3698b51a4376SLisandro Dalcin /* store matrix values */ 3699b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3700b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 37019566063dSJacob Faibussowitsch PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3702b51a4376SLisandro Dalcin } 3703b51a4376SLisandro Dalcin 37049566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 37059566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 37069566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 37079566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 37083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3709b51a4376SLisandro Dalcin } 3710b51a4376SLisandro Dalcin 3711d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3712d71ae5a4SJacob Faibussowitsch { 37137f489da9SVaclav Hapla PetscBool isbinary; 3714f501eaabSShri Abhyankar 3715f501eaabSShri Abhyankar PetscFunctionBegin; 37169566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 37175f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 37189566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 37193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3720f501eaabSShri Abhyankar } 3721f501eaabSShri Abhyankar 3722273d9f13SBarry Smith /*@C 372311a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3724273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 372520f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 372620f4b53cSBarry Smith (or the array `nnz`). 37272593348eSBarry Smith 3728d083f849SBarry Smith Collective 3729273d9f13SBarry Smith 3730273d9f13SBarry Smith Input Parameters: 373111a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 373211a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 373311a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3734273d9f13SBarry Smith . m - number of rows 3735273d9f13SBarry Smith . n - number of columns 373635d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 373735d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 373820f4b53cSBarry Smith (possibly different for each block row) or `NULL` 3739273d9f13SBarry Smith 3740273d9f13SBarry Smith Output Parameter: 3741273d9f13SBarry Smith . A - the matrix 3742273d9f13SBarry Smith 3743273d9f13SBarry Smith Options Database Keys: 374411a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3745a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3746273d9f13SBarry Smith 3747273d9f13SBarry Smith Level: intermediate 3748273d9f13SBarry Smith 3749273d9f13SBarry Smith Notes: 37502ef1f0ffSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 37512ef1f0ffSBarry Smith MatXXXXSetPreallocation() paradigm instead of this routine directly. 37522ef1f0ffSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 37532ef1f0ffSBarry Smith 3754d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3755d1be2dadSMatthew Knepley 37562ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 375749a6f317SBarry Smith 375835d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 375935d8aa7fSBarry Smith 37602ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3761273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 376220f4b53cSBarry Smith either one (as in Fortran) or zero. 3763273d9f13SBarry Smith 37642ef1f0ffSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 37652ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3766651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3767273d9f13SBarry Smith matrices. 3768273d9f13SBarry Smith 37691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3770273d9f13SBarry Smith @*/ 3771d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3772d71ae5a4SJacob Faibussowitsch { 3773273d9f13SBarry Smith PetscFunctionBegin; 37749566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 37759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 37769566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 37779566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 37783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3779273d9f13SBarry Smith } 3780273d9f13SBarry Smith 3781273d9f13SBarry Smith /*@C 3782273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3783273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 378420f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz` 378520f4b53cSBarry Smith (or the array `nnz`). 3786273d9f13SBarry Smith 3787d083f849SBarry Smith Collective 3788273d9f13SBarry Smith 3789273d9f13SBarry Smith Input Parameters: 37901c4f3114SJed Brown + B - the matrix 379111a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 379211a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3793273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3794273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 37952ef1f0ffSBarry Smith (possibly different for each block row) or `NULL` 3796273d9f13SBarry Smith 3797273d9f13SBarry Smith Options Database Keys: 379811a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3799a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3800273d9f13SBarry Smith 3801273d9f13SBarry Smith Level: intermediate 3802273d9f13SBarry Smith 3803273d9f13SBarry Smith Notes: 38042ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 380549a6f317SBarry Smith 380611a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 3807aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 380820f4b53cSBarry Smith You can also run with the option `-info` and look for messages with the string 3809aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3810aa95bbe8SBarry Smith 38112ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 3812273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 381320f4b53cSBarry Smith either one (as in Fortran) or zero. 3814273d9f13SBarry Smith 3815273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 38162ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3817651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3818273d9f13SBarry Smith 38191cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3820273d9f13SBarry Smith @*/ 3821d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3822d71ae5a4SJacob Faibussowitsch { 3823273d9f13SBarry Smith PetscFunctionBegin; 38246ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 38256ba663aaSJed Brown PetscValidType(B, 1); 38266ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3827cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 38283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3829273d9f13SBarry Smith } 3830a1d92eedSBarry Smith 3831725b52f3SLisandro Dalcin /*@C 383211a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3833725b52f3SLisandro Dalcin 3834d083f849SBarry Smith Collective 3835725b52f3SLisandro Dalcin 3836725b52f3SLisandro Dalcin Input Parameters: 38371c4f3114SJed Brown + B - the matrix 383820f4b53cSBarry Smith . bs - the blocksize 38392ef1f0ffSBarry Smith . i - the indices into `j` for the start of each local row (starts with zero) 3840725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3841725b52f3SLisandro Dalcin - v - optional values in the matrix 3842725b52f3SLisandro Dalcin 3843664954b6SBarry Smith Level: advanced 3844725b52f3SLisandro Dalcin 38453adadaf3SJed Brown Notes: 384611a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 384711a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 38483adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 384911a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 38503adadaf3SJed Brown block column and the second index is over columns within a block. 38513adadaf3SJed Brown 3852664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3853664954b6SBarry Smith 38541cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3855725b52f3SLisandro Dalcin @*/ 3856d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3857d71ae5a4SJacob Faibussowitsch { 3858725b52f3SLisandro Dalcin PetscFunctionBegin; 38596ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 38606ba663aaSJed Brown PetscValidType(B, 1); 38616ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3862cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 38633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3864725b52f3SLisandro Dalcin } 3865725b52f3SLisandro Dalcin 3866c75a6043SHong Zhang /*@ 386711a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3868c75a6043SHong Zhang 3869d083f849SBarry Smith Collective 3870c75a6043SHong Zhang 3871c75a6043SHong Zhang Input Parameters: 3872c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3873c75a6043SHong Zhang . bs - size of block 3874c75a6043SHong Zhang . m - number of rows 3875c75a6043SHong Zhang . n - number of columns 3876483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3877c75a6043SHong Zhang . j - column indices 3878c75a6043SHong Zhang - a - matrix values 3879c75a6043SHong Zhang 3880c75a6043SHong Zhang Output Parameter: 3881c75a6043SHong Zhang . mat - the matrix 3882c75a6043SHong Zhang 3883dfb205c3SBarry Smith Level: advanced 3884c75a6043SHong Zhang 3885c75a6043SHong Zhang Notes: 38862ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3887c75a6043SHong Zhang once the matrix is destroyed 3888c75a6043SHong Zhang 3889c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3890c75a6043SHong Zhang 38912ef1f0ffSBarry Smith The `i` and `j` indices are 0 based 3892c75a6043SHong Zhang 389311a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3894dfb205c3SBarry Smith 38953adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 38963adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 38973adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 38983adadaf3SJed Brown with column-major ordering within blocks. 3899dfb205c3SBarry Smith 39001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3901c75a6043SHong Zhang @*/ 3902d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3903d71ae5a4SJacob Faibussowitsch { 3904c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3905c75a6043SHong Zhang 3906c75a6043SHong Zhang PetscFunctionBegin; 39075f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 39085f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3909c75a6043SHong Zhang 39109566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 39119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 39129566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 39139566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3914c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 39159566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3916c75a6043SHong Zhang 3917c75a6043SHong Zhang baij->i = i; 3918c75a6043SHong Zhang baij->j = j; 3919c75a6043SHong Zhang baij->a = a; 392026fbe8dcSKarl Rupp 3921c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3922c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3923e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3924e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3925ceb5bf51SJacob Faibussowitsch baij->free_imax_ilen = PETSC_TRUE; 3926c75a6043SHong Zhang 3927ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < m; ii++) { 3928ceb5bf51SJacob Faibussowitsch const PetscInt row_len = i[ii + 1] - i[ii]; 3929ceb5bf51SJacob Faibussowitsch 3930ceb5bf51SJacob Faibussowitsch baij->ilen[ii] = baij->imax[ii] = row_len; 3931ceb5bf51SJacob Faibussowitsch PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3932c75a6043SHong Zhang } 393376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3934ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 39356bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 39366bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3937c75a6043SHong Zhang } 393876bd3646SJed Brown } 3939c75a6043SHong Zhang 39409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 39419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 39423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3943c75a6043SHong Zhang } 3944bdf6f3fcSHong Zhang 3945d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3946d71ae5a4SJacob Faibussowitsch { 3947bdf6f3fcSHong Zhang PetscFunctionBegin; 39489566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 39493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3950bdf6f3fcSHong Zhang } 3951