1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 137ea3e4caSstefano_zampini #endif 147ea3e4caSstefano_zampini 15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17b5b72c8aSIrina Sokolova #endif 18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19b5b72c8aSIrina Sokolova 209371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 219463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 22857cbf51SRichard Tran Mills PetscInt m, n, i; 239463ebdaSPierre Jolivet PetscInt ib, jb, bs = A->rmap->bs; 249463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 259463ebdaSPierre Jolivet 269463ebdaSPierre Jolivet PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 28857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] = 0.0; 299463ebdaSPierre Jolivet if (type == NORM_2) { 309463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 319463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 329463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 33857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 349463ebdaSPierre Jolivet a_val++; 359463ebdaSPierre Jolivet } 369463ebdaSPierre Jolivet } 379463ebdaSPierre Jolivet } 389463ebdaSPierre Jolivet } else if (type == NORM_1) { 399463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 409463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 419463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 42857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 439463ebdaSPierre Jolivet a_val++; 449463ebdaSPierre Jolivet } 459463ebdaSPierre Jolivet } 469463ebdaSPierre Jolivet } 479463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 489463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 499463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 509463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 519463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 52857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 539463ebdaSPierre Jolivet a_val++; 549463ebdaSPierre Jolivet } 559463ebdaSPierre Jolivet } 569463ebdaSPierre Jolivet } 57857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 58857cbf51SRichard Tran Mills for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 59857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 60857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 61857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 62857cbf51SRichard Tran Mills a_val++; 63857cbf51SRichard Tran Mills } 64857cbf51SRichard Tran Mills } 65857cbf51SRichard Tran Mills } 66857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 67857cbf51SRichard Tran Mills for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 68857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 69857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 70857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 71857cbf51SRichard Tran Mills a_val++; 72857cbf51SRichard Tran Mills } 73857cbf51SRichard Tran Mills } 74857cbf51SRichard Tran Mills } 75857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 769463ebdaSPierre Jolivet if (type == NORM_2) { 77857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 78857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] /= m; 809463ebdaSPierre Jolivet } 819463ebdaSPierre Jolivet PetscFunctionReturn(0); 829463ebdaSPierre Jolivet } 839463ebdaSPierre Jolivet 849371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) { 85b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 86de80f912SBarry Smith PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 877f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 8862bba022SBarry Smith PetscReal shift = 0.0; 891a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 90b01c7715SBarry Smith 91b01c7715SBarry Smith PetscFunctionBegin; 92a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 93a455e926SHong Zhang 949797317bSBarry Smith if (a->idiagvalid) { 959797317bSBarry Smith if (values) *values = a->idiag; 969797317bSBarry Smith PetscFunctionReturn(0); 979797317bSBarry Smith } 989566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 99b01c7715SBarry Smith diag_offset = a->diag; 100b01c7715SBarry Smith if (!a->idiag) { 1019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 1029566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, bs2 * mbs * sizeof(PetscScalar))); 103b01c7715SBarry Smith } 104b01c7715SBarry Smith diag = a->idiag; 105bbead8a2SBarry Smith if (values) *values = a->idiag; 106b01c7715SBarry Smith /* factor and invert each block */ 107521d7252SBarry Smith switch (bs) { 108ab040260SJed Brown case 1: 109ab040260SJed Brown for (i = 0; i < mbs; i++) { 110ab040260SJed Brown odiag = v + 1 * diag_offset[i]; 111ab040260SJed Brown diag[0] = odiag[0]; 112ec1892c8SHong Zhang 113ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 114ec1892c8SHong Zhang if (allowzeropivot) { 1157b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1167b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1177b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 11998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 120ec1892c8SHong Zhang } 121ec1892c8SHong Zhang 122d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 123ab040260SJed Brown diag += 1; 124ab040260SJed Brown } 125ab040260SJed Brown break; 126b01c7715SBarry Smith case 2: 127b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 128b01c7715SBarry Smith odiag = v + 4 * diag_offset[i]; 1299371c9d4SSatish Balay diag[0] = odiag[0]; 1309371c9d4SSatish Balay diag[1] = odiag[1]; 1319371c9d4SSatish Balay diag[2] = odiag[2]; 1329371c9d4SSatish Balay diag[3] = odiag[3]; 1339566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1347b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 135b01c7715SBarry Smith diag += 4; 136b01c7715SBarry Smith } 137b01c7715SBarry Smith break; 138b01c7715SBarry Smith case 3: 139b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 140b01c7715SBarry Smith odiag = v + 9 * diag_offset[i]; 1419371c9d4SSatish Balay diag[0] = odiag[0]; 1429371c9d4SSatish Balay diag[1] = odiag[1]; 1439371c9d4SSatish Balay diag[2] = odiag[2]; 1449371c9d4SSatish Balay diag[3] = odiag[3]; 1459371c9d4SSatish Balay diag[4] = odiag[4]; 1469371c9d4SSatish Balay diag[5] = odiag[5]; 1479371c9d4SSatish Balay diag[6] = odiag[6]; 1489371c9d4SSatish Balay diag[7] = odiag[7]; 149b01c7715SBarry Smith diag[8] = odiag[8]; 1509566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1517b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 152b01c7715SBarry Smith diag += 9; 153b01c7715SBarry Smith } 154b01c7715SBarry Smith break; 155b01c7715SBarry Smith case 4: 156b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 157b01c7715SBarry Smith odiag = v + 16 * diag_offset[i]; 1589566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1599566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1607b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 161b01c7715SBarry Smith diag += 16; 162b01c7715SBarry Smith } 163b01c7715SBarry Smith break; 164b01c7715SBarry Smith case 5: 165b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 166b01c7715SBarry Smith odiag = v + 25 * diag_offset[i]; 1679566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1689566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1697b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 170b01c7715SBarry Smith diag += 25; 171b01c7715SBarry Smith } 172b01c7715SBarry Smith break; 173d49b2adcSBarry Smith case 6: 174d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 175d49b2adcSBarry Smith odiag = v + 36 * diag_offset[i]; 1769566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1779566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1787b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 179d49b2adcSBarry Smith diag += 36; 180d49b2adcSBarry Smith } 181d49b2adcSBarry Smith break; 182de80f912SBarry Smith case 7: 183de80f912SBarry Smith for (i = 0; i < mbs; i++) { 184de80f912SBarry Smith odiag = v + 49 * diag_offset[i]; 1859566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1869566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1877b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 188de80f912SBarry Smith diag += 49; 189de80f912SBarry Smith } 190de80f912SBarry Smith break; 191b01c7715SBarry Smith default: 1929566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 193de80f912SBarry Smith for (i = 0; i < mbs; i++) { 194de80f912SBarry Smith odiag = v + bs2 * diag_offset[i]; 1959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 1969566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 1977b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 198de80f912SBarry Smith diag += bs2; 199de80f912SBarry Smith } 2009566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 201b01c7715SBarry Smith } 202b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 203b01c7715SBarry Smith PetscFunctionReturn(0); 204b01c7715SBarry Smith } 205b01c7715SBarry Smith 2069371c9d4SSatish Balay PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2076d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 208e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 209e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 210e48d15efSToby Isaac const PetscScalar *b, *xb; 2115455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 212e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 213c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 214b01c7715SBarry Smith 215b01c7715SBarry Smith PetscFunctionBegin; 216b01c7715SBarry Smith its = its * lits; 2175f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2185f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2195f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2205f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2215f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 222b01c7715SBarry Smith 2239566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 224b01c7715SBarry Smith 225b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 226b01c7715SBarry Smith diag = a->diag; 227b01c7715SBarry Smith idiag = a->idiag; 228de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 229*48a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 230*48a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 231*48a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2323475c22fSBarry Smith work = a->mult_work; 2333475c22fSBarry Smith t = a->sor_workt; 234de80f912SBarry Smith w = a->sor_work; 235de80f912SBarry Smith 2369566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 238de80f912SBarry Smith 239de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 240de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241e48d15efSToby Isaac switch (bs) { 242e48d15efSToby Isaac case 1: 243e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 244e48d15efSToby Isaac t[0] = b[0]; 245e48d15efSToby Isaac i2 = 1; 246e48d15efSToby Isaac idiag += 1; 247e48d15efSToby Isaac for (i = 1; i < m; i++) { 248e48d15efSToby Isaac v = aa + ai[i]; 249e48d15efSToby Isaac vi = aj + ai[i]; 250e48d15efSToby Isaac nz = diag[i] - ai[i]; 251e48d15efSToby Isaac s[0] = b[i2]; 252e48d15efSToby Isaac for (j = 0; j < nz; j++) { 253e48d15efSToby Isaac xw[0] = x[vi[j]]; 254e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 255e48d15efSToby Isaac } 256e48d15efSToby Isaac t[i2] = s[0]; 257e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 258e48d15efSToby Isaac x[i2] = xw[0]; 259e48d15efSToby Isaac idiag += 1; 260e48d15efSToby Isaac i2 += 1; 261e48d15efSToby Isaac } 262e48d15efSToby Isaac break; 263e48d15efSToby Isaac case 2: 264e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2659371c9d4SSatish Balay t[0] = b[0]; 2669371c9d4SSatish Balay t[1] = b[1]; 267e48d15efSToby Isaac i2 = 2; 268e48d15efSToby Isaac idiag += 4; 269e48d15efSToby Isaac for (i = 1; i < m; i++) { 270e48d15efSToby Isaac v = aa + 4 * ai[i]; 271e48d15efSToby Isaac vi = aj + ai[i]; 272e48d15efSToby Isaac nz = diag[i] - ai[i]; 2739371c9d4SSatish Balay s[0] = b[i2]; 2749371c9d4SSatish Balay s[1] = b[i2 + 1]; 275e48d15efSToby Isaac for (j = 0; j < nz; j++) { 276e48d15efSToby Isaac idx = 2 * vi[j]; 277e48d15efSToby Isaac it = 4 * j; 2789371c9d4SSatish Balay xw[0] = x[idx]; 2799371c9d4SSatish Balay xw[1] = x[1 + idx]; 280e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 281e48d15efSToby Isaac } 2829371c9d4SSatish Balay t[i2] = s[0]; 2839371c9d4SSatish Balay t[i2 + 1] = s[1]; 284e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2859371c9d4SSatish Balay x[i2] = xw[0]; 2869371c9d4SSatish Balay x[i2 + 1] = xw[1]; 287e48d15efSToby Isaac idiag += 4; 288e48d15efSToby Isaac i2 += 2; 289e48d15efSToby Isaac } 290e48d15efSToby Isaac break; 291e48d15efSToby Isaac case 3: 292e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 2939371c9d4SSatish Balay t[0] = b[0]; 2949371c9d4SSatish Balay t[1] = b[1]; 2959371c9d4SSatish Balay t[2] = b[2]; 296e48d15efSToby Isaac i2 = 3; 297e48d15efSToby Isaac idiag += 9; 298e48d15efSToby Isaac for (i = 1; i < m; i++) { 299e48d15efSToby Isaac v = aa + 9 * ai[i]; 300e48d15efSToby Isaac vi = aj + ai[i]; 301e48d15efSToby Isaac nz = diag[i] - ai[i]; 3029371c9d4SSatish Balay s[0] = b[i2]; 3039371c9d4SSatish Balay s[1] = b[i2 + 1]; 3049371c9d4SSatish Balay s[2] = b[i2 + 2]; 305e48d15efSToby Isaac while (nz--) { 306e48d15efSToby Isaac idx = 3 * (*vi++); 3079371c9d4SSatish Balay xw[0] = x[idx]; 3089371c9d4SSatish Balay xw[1] = x[1 + idx]; 3099371c9d4SSatish Balay xw[2] = x[2 + idx]; 310e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 311e48d15efSToby Isaac v += 9; 312e48d15efSToby Isaac } 3139371c9d4SSatish Balay t[i2] = s[0]; 3149371c9d4SSatish Balay t[i2 + 1] = s[1]; 3159371c9d4SSatish Balay t[i2 + 2] = s[2]; 316e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3179371c9d4SSatish Balay x[i2] = xw[0]; 3189371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3199371c9d4SSatish Balay x[i2 + 2] = xw[2]; 320e48d15efSToby Isaac idiag += 9; 321e48d15efSToby Isaac i2 += 3; 322e48d15efSToby Isaac } 323e48d15efSToby Isaac break; 324e48d15efSToby Isaac case 4: 325e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3269371c9d4SSatish Balay t[0] = b[0]; 3279371c9d4SSatish Balay t[1] = b[1]; 3289371c9d4SSatish Balay t[2] = b[2]; 3299371c9d4SSatish Balay t[3] = b[3]; 330e48d15efSToby Isaac i2 = 4; 331e48d15efSToby Isaac idiag += 16; 332e48d15efSToby Isaac for (i = 1; i < m; i++) { 333e48d15efSToby Isaac v = aa + 16 * ai[i]; 334e48d15efSToby Isaac vi = aj + ai[i]; 335e48d15efSToby Isaac nz = diag[i] - ai[i]; 3369371c9d4SSatish Balay s[0] = b[i2]; 3379371c9d4SSatish Balay s[1] = b[i2 + 1]; 3389371c9d4SSatish Balay s[2] = b[i2 + 2]; 3399371c9d4SSatish Balay s[3] = b[i2 + 3]; 340e48d15efSToby Isaac while (nz--) { 341e48d15efSToby Isaac idx = 4 * (*vi++); 3429371c9d4SSatish Balay xw[0] = x[idx]; 3439371c9d4SSatish Balay xw[1] = x[1 + idx]; 3449371c9d4SSatish Balay xw[2] = x[2 + idx]; 3459371c9d4SSatish Balay xw[3] = x[3 + idx]; 346e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 347e48d15efSToby Isaac v += 16; 348e48d15efSToby Isaac } 3499371c9d4SSatish Balay t[i2] = s[0]; 3509371c9d4SSatish Balay t[i2 + 1] = s[1]; 3519371c9d4SSatish Balay t[i2 + 2] = s[2]; 3529371c9d4SSatish Balay t[i2 + 3] = s[3]; 353e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3549371c9d4SSatish Balay x[i2] = xw[0]; 3559371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3569371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3579371c9d4SSatish Balay x[i2 + 3] = xw[3]; 358e48d15efSToby Isaac idiag += 16; 359e48d15efSToby Isaac i2 += 4; 360e48d15efSToby Isaac } 361e48d15efSToby Isaac break; 362e48d15efSToby Isaac case 5: 363e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3649371c9d4SSatish Balay t[0] = b[0]; 3659371c9d4SSatish Balay t[1] = b[1]; 3669371c9d4SSatish Balay t[2] = b[2]; 3679371c9d4SSatish Balay t[3] = b[3]; 3689371c9d4SSatish Balay t[4] = b[4]; 369e48d15efSToby Isaac i2 = 5; 370e48d15efSToby Isaac idiag += 25; 371e48d15efSToby Isaac for (i = 1; i < m; i++) { 372e48d15efSToby Isaac v = aa + 25 * ai[i]; 373e48d15efSToby Isaac vi = aj + ai[i]; 374e48d15efSToby Isaac nz = diag[i] - ai[i]; 3759371c9d4SSatish Balay s[0] = b[i2]; 3769371c9d4SSatish Balay s[1] = b[i2 + 1]; 3779371c9d4SSatish Balay s[2] = b[i2 + 2]; 3789371c9d4SSatish Balay s[3] = b[i2 + 3]; 3799371c9d4SSatish Balay s[4] = b[i2 + 4]; 380e48d15efSToby Isaac while (nz--) { 381e48d15efSToby Isaac idx = 5 * (*vi++); 3829371c9d4SSatish Balay xw[0] = x[idx]; 3839371c9d4SSatish Balay xw[1] = x[1 + idx]; 3849371c9d4SSatish Balay xw[2] = x[2 + idx]; 3859371c9d4SSatish Balay xw[3] = x[3 + idx]; 3869371c9d4SSatish Balay xw[4] = x[4 + idx]; 387e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 388e48d15efSToby Isaac v += 25; 389e48d15efSToby Isaac } 3909371c9d4SSatish Balay t[i2] = s[0]; 3919371c9d4SSatish Balay t[i2 + 1] = s[1]; 3929371c9d4SSatish Balay t[i2 + 2] = s[2]; 3939371c9d4SSatish Balay t[i2 + 3] = s[3]; 3949371c9d4SSatish Balay t[i2 + 4] = s[4]; 395e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 3969371c9d4SSatish Balay x[i2] = xw[0]; 3979371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3989371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3999371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4009371c9d4SSatish Balay x[i2 + 4] = xw[4]; 401e48d15efSToby Isaac idiag += 25; 402e48d15efSToby Isaac i2 += 5; 403e48d15efSToby Isaac } 404e48d15efSToby Isaac break; 405e48d15efSToby Isaac case 6: 406e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4079371c9d4SSatish Balay t[0] = b[0]; 4089371c9d4SSatish Balay t[1] = b[1]; 4099371c9d4SSatish Balay t[2] = b[2]; 4109371c9d4SSatish Balay t[3] = b[3]; 4119371c9d4SSatish Balay t[4] = b[4]; 4129371c9d4SSatish Balay t[5] = b[5]; 413e48d15efSToby Isaac i2 = 6; 414e48d15efSToby Isaac idiag += 36; 415e48d15efSToby Isaac for (i = 1; i < m; i++) { 416e48d15efSToby Isaac v = aa + 36 * ai[i]; 417e48d15efSToby Isaac vi = aj + ai[i]; 418e48d15efSToby Isaac nz = diag[i] - ai[i]; 4199371c9d4SSatish Balay s[0] = b[i2]; 4209371c9d4SSatish Balay s[1] = b[i2 + 1]; 4219371c9d4SSatish Balay s[2] = b[i2 + 2]; 4229371c9d4SSatish Balay s[3] = b[i2 + 3]; 4239371c9d4SSatish Balay s[4] = b[i2 + 4]; 4249371c9d4SSatish Balay s[5] = b[i2 + 5]; 425e48d15efSToby Isaac while (nz--) { 426e48d15efSToby Isaac idx = 6 * (*vi++); 4279371c9d4SSatish Balay xw[0] = x[idx]; 4289371c9d4SSatish Balay xw[1] = x[1 + idx]; 4299371c9d4SSatish Balay xw[2] = x[2 + idx]; 4309371c9d4SSatish Balay xw[3] = x[3 + idx]; 4319371c9d4SSatish Balay xw[4] = x[4 + idx]; 4329371c9d4SSatish Balay xw[5] = x[5 + idx]; 433e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 434e48d15efSToby Isaac v += 36; 435e48d15efSToby Isaac } 4369371c9d4SSatish Balay t[i2] = s[0]; 4379371c9d4SSatish Balay t[i2 + 1] = s[1]; 4389371c9d4SSatish Balay t[i2 + 2] = s[2]; 4399371c9d4SSatish Balay t[i2 + 3] = s[3]; 4409371c9d4SSatish Balay t[i2 + 4] = s[4]; 4419371c9d4SSatish Balay t[i2 + 5] = s[5]; 442e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4439371c9d4SSatish Balay x[i2] = xw[0]; 4449371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4459371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4469371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4479371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4489371c9d4SSatish Balay x[i2 + 5] = xw[5]; 449e48d15efSToby Isaac idiag += 36; 450e48d15efSToby Isaac i2 += 6; 451e48d15efSToby Isaac } 452e48d15efSToby Isaac break; 453e48d15efSToby Isaac case 7: 454e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4559371c9d4SSatish Balay t[0] = b[0]; 4569371c9d4SSatish Balay t[1] = b[1]; 4579371c9d4SSatish Balay t[2] = b[2]; 4589371c9d4SSatish Balay t[3] = b[3]; 4599371c9d4SSatish Balay t[4] = b[4]; 4609371c9d4SSatish Balay t[5] = b[5]; 4619371c9d4SSatish Balay t[6] = b[6]; 462e48d15efSToby Isaac i2 = 7; 463e48d15efSToby Isaac idiag += 49; 464e48d15efSToby Isaac for (i = 1; i < m; i++) { 465e48d15efSToby Isaac v = aa + 49 * ai[i]; 466e48d15efSToby Isaac vi = aj + ai[i]; 467e48d15efSToby Isaac nz = diag[i] - ai[i]; 4689371c9d4SSatish Balay s[0] = b[i2]; 4699371c9d4SSatish Balay s[1] = b[i2 + 1]; 4709371c9d4SSatish Balay s[2] = b[i2 + 2]; 4719371c9d4SSatish Balay s[3] = b[i2 + 3]; 4729371c9d4SSatish Balay s[4] = b[i2 + 4]; 4739371c9d4SSatish Balay s[5] = b[i2 + 5]; 4749371c9d4SSatish Balay s[6] = b[i2 + 6]; 475e48d15efSToby Isaac while (nz--) { 476e48d15efSToby Isaac idx = 7 * (*vi++); 4779371c9d4SSatish Balay xw[0] = x[idx]; 4789371c9d4SSatish Balay xw[1] = x[1 + idx]; 4799371c9d4SSatish Balay xw[2] = x[2 + idx]; 4809371c9d4SSatish Balay xw[3] = x[3 + idx]; 4819371c9d4SSatish Balay xw[4] = x[4 + idx]; 4829371c9d4SSatish Balay xw[5] = x[5 + idx]; 4839371c9d4SSatish Balay xw[6] = x[6 + idx]; 484e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 485e48d15efSToby Isaac v += 49; 486e48d15efSToby Isaac } 4879371c9d4SSatish Balay t[i2] = s[0]; 4889371c9d4SSatish Balay t[i2 + 1] = s[1]; 4899371c9d4SSatish Balay t[i2 + 2] = s[2]; 4909371c9d4SSatish Balay t[i2 + 3] = s[3]; 4919371c9d4SSatish Balay t[i2 + 4] = s[4]; 4929371c9d4SSatish Balay t[i2 + 5] = s[5]; 4939371c9d4SSatish Balay t[i2 + 6] = s[6]; 494e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 4959371c9d4SSatish Balay x[i2] = xw[0]; 4969371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4979371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4989371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4999371c9d4SSatish Balay x[i2 + 4] = xw[4]; 5009371c9d4SSatish Balay x[i2 + 5] = xw[5]; 5019371c9d4SSatish Balay x[i2 + 6] = xw[6]; 502e48d15efSToby Isaac idiag += 49; 503e48d15efSToby Isaac i2 += 7; 504e48d15efSToby Isaac } 505e48d15efSToby Isaac break; 506e48d15efSToby Isaac default: 50796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 509de80f912SBarry Smith i2 = bs; 510de80f912SBarry Smith idiag += bs2; 511de80f912SBarry Smith for (i = 1; i < m; i++) { 512de80f912SBarry Smith v = aa + bs2 * ai[i]; 513de80f912SBarry Smith vi = aj + ai[i]; 514de80f912SBarry Smith nz = diag[i] - ai[i]; 515de80f912SBarry Smith 5169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 517de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 518de80f912SBarry Smith workt = work; 519de80f912SBarry Smith for (j = 0; j < nz; j++) { 5209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 521de80f912SBarry Smith workt += bs; 522de80f912SBarry Smith } 52396b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 52596b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 526de80f912SBarry Smith 527de80f912SBarry Smith idiag += bs2; 528de80f912SBarry Smith i2 += bs; 529de80f912SBarry Smith } 530e48d15efSToby Isaac break; 531e48d15efSToby Isaac } 532de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5339566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 534e48d15efSToby Isaac xb = t; 5359371c9d4SSatish Balay } else xb = b; 536de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 537e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 538e48d15efSToby Isaac i2 = bs * (m - 1); 539e48d15efSToby Isaac switch (bs) { 540e48d15efSToby Isaac case 1: 541e48d15efSToby Isaac s[0] = xb[i2]; 542e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 543e48d15efSToby Isaac x[i2] = xw[0]; 544e48d15efSToby Isaac i2 -= 1; 545e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 546e48d15efSToby Isaac v = aa + (diag[i] + 1); 547e48d15efSToby Isaac vi = aj + diag[i] + 1; 548e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 549e48d15efSToby Isaac s[0] = xb[i2]; 550e48d15efSToby Isaac for (j = 0; j < nz; j++) { 551e48d15efSToby Isaac xw[0] = x[vi[j]]; 552e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 553e48d15efSToby Isaac } 554e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555e48d15efSToby Isaac x[i2] = xw[0]; 556e48d15efSToby Isaac idiag -= 1; 557e48d15efSToby Isaac i2 -= 1; 558e48d15efSToby Isaac } 559e48d15efSToby Isaac break; 560e48d15efSToby Isaac case 2: 5619371c9d4SSatish Balay s[0] = xb[i2]; 5629371c9d4SSatish Balay s[1] = xb[i2 + 1]; 563e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5649371c9d4SSatish Balay x[i2] = xw[0]; 5659371c9d4SSatish Balay x[i2 + 1] = xw[1]; 566e48d15efSToby Isaac i2 -= 2; 567e48d15efSToby Isaac idiag -= 4; 568e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 569e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 570e48d15efSToby Isaac vi = aj + diag[i] + 1; 571e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5729371c9d4SSatish Balay s[0] = xb[i2]; 5739371c9d4SSatish Balay s[1] = xb[i2 + 1]; 574e48d15efSToby Isaac for (j = 0; j < nz; j++) { 575e48d15efSToby Isaac idx = 2 * vi[j]; 576e48d15efSToby Isaac it = 4 * j; 5779371c9d4SSatish Balay xw[0] = x[idx]; 5789371c9d4SSatish Balay xw[1] = x[1 + idx]; 579e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 580e48d15efSToby Isaac } 581e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5829371c9d4SSatish Balay x[i2] = xw[0]; 5839371c9d4SSatish Balay x[i2 + 1] = xw[1]; 584e48d15efSToby Isaac idiag -= 4; 585e48d15efSToby Isaac i2 -= 2; 586e48d15efSToby Isaac } 587e48d15efSToby Isaac break; 588e48d15efSToby Isaac case 3: 5899371c9d4SSatish Balay s[0] = xb[i2]; 5909371c9d4SSatish Balay s[1] = xb[i2 + 1]; 5919371c9d4SSatish Balay s[2] = xb[i2 + 2]; 592e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 5939371c9d4SSatish Balay x[i2] = xw[0]; 5949371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5959371c9d4SSatish Balay x[i2 + 2] = xw[2]; 596e48d15efSToby Isaac i2 -= 3; 597e48d15efSToby Isaac idiag -= 9; 598e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 599e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 600e48d15efSToby Isaac vi = aj + diag[i] + 1; 601e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6029371c9d4SSatish Balay s[0] = xb[i2]; 6039371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6049371c9d4SSatish Balay s[2] = xb[i2 + 2]; 605e48d15efSToby Isaac while (nz--) { 606e48d15efSToby Isaac idx = 3 * (*vi++); 6079371c9d4SSatish Balay xw[0] = x[idx]; 6089371c9d4SSatish Balay xw[1] = x[1 + idx]; 6099371c9d4SSatish Balay xw[2] = x[2 + idx]; 610e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 611e48d15efSToby Isaac v += 9; 612e48d15efSToby Isaac } 613e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6149371c9d4SSatish Balay x[i2] = xw[0]; 6159371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6169371c9d4SSatish Balay x[i2 + 2] = xw[2]; 617e48d15efSToby Isaac idiag -= 9; 618e48d15efSToby Isaac i2 -= 3; 619e48d15efSToby Isaac } 620e48d15efSToby Isaac break; 621e48d15efSToby Isaac case 4: 6229371c9d4SSatish Balay s[0] = xb[i2]; 6239371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6249371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6259371c9d4SSatish Balay s[3] = xb[i2 + 3]; 626e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6279371c9d4SSatish Balay x[i2] = xw[0]; 6289371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6299371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6309371c9d4SSatish Balay x[i2 + 3] = xw[3]; 631e48d15efSToby Isaac i2 -= 4; 632e48d15efSToby Isaac idiag -= 16; 633e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 634e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 635e48d15efSToby Isaac vi = aj + diag[i] + 1; 636e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6379371c9d4SSatish Balay s[0] = xb[i2]; 6389371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6399371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6409371c9d4SSatish Balay s[3] = xb[i2 + 3]; 641e48d15efSToby Isaac while (nz--) { 642e48d15efSToby Isaac idx = 4 * (*vi++); 6439371c9d4SSatish Balay xw[0] = x[idx]; 6449371c9d4SSatish Balay xw[1] = x[1 + idx]; 6459371c9d4SSatish Balay xw[2] = x[2 + idx]; 6469371c9d4SSatish Balay xw[3] = x[3 + idx]; 647e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 648e48d15efSToby Isaac v += 16; 649e48d15efSToby Isaac } 650e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6519371c9d4SSatish Balay x[i2] = xw[0]; 6529371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6539371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6549371c9d4SSatish Balay x[i2 + 3] = xw[3]; 655e48d15efSToby Isaac idiag -= 16; 656e48d15efSToby Isaac i2 -= 4; 657e48d15efSToby Isaac } 658e48d15efSToby Isaac break; 659e48d15efSToby Isaac case 5: 6609371c9d4SSatish Balay s[0] = xb[i2]; 6619371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6629371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6639371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6649371c9d4SSatish Balay s[4] = xb[i2 + 4]; 665e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6669371c9d4SSatish Balay x[i2] = xw[0]; 6679371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6689371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6699371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6709371c9d4SSatish Balay x[i2 + 4] = xw[4]; 671e48d15efSToby Isaac i2 -= 5; 672e48d15efSToby Isaac idiag -= 25; 673e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 674e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 675e48d15efSToby Isaac vi = aj + diag[i] + 1; 676e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6779371c9d4SSatish Balay s[0] = xb[i2]; 6789371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6799371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6809371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6819371c9d4SSatish Balay s[4] = xb[i2 + 4]; 682e48d15efSToby Isaac while (nz--) { 683e48d15efSToby Isaac idx = 5 * (*vi++); 6849371c9d4SSatish Balay xw[0] = x[idx]; 6859371c9d4SSatish Balay xw[1] = x[1 + idx]; 6869371c9d4SSatish Balay xw[2] = x[2 + idx]; 6879371c9d4SSatish Balay xw[3] = x[3 + idx]; 6889371c9d4SSatish Balay xw[4] = x[4 + idx]; 689e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 690e48d15efSToby Isaac v += 25; 691e48d15efSToby Isaac } 692e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6939371c9d4SSatish Balay x[i2] = xw[0]; 6949371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6959371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6969371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6979371c9d4SSatish Balay x[i2 + 4] = xw[4]; 698e48d15efSToby Isaac idiag -= 25; 699e48d15efSToby Isaac i2 -= 5; 700e48d15efSToby Isaac } 701e48d15efSToby Isaac break; 702e48d15efSToby Isaac case 6: 7039371c9d4SSatish Balay s[0] = xb[i2]; 7049371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7059371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7069371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7079371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7089371c9d4SSatish Balay s[5] = xb[i2 + 5]; 709e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7109371c9d4SSatish Balay x[i2] = xw[0]; 7119371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7129371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7139371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7149371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7159371c9d4SSatish Balay x[i2 + 5] = xw[5]; 716e48d15efSToby Isaac i2 -= 6; 717e48d15efSToby Isaac idiag -= 36; 718e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 719e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 720e48d15efSToby Isaac vi = aj + diag[i] + 1; 721e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7229371c9d4SSatish Balay s[0] = xb[i2]; 7239371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7249371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7259371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7269371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7279371c9d4SSatish Balay s[5] = xb[i2 + 5]; 728e48d15efSToby Isaac while (nz--) { 729e48d15efSToby Isaac idx = 6 * (*vi++); 7309371c9d4SSatish Balay xw[0] = x[idx]; 7319371c9d4SSatish Balay xw[1] = x[1 + idx]; 7329371c9d4SSatish Balay xw[2] = x[2 + idx]; 7339371c9d4SSatish Balay xw[3] = x[3 + idx]; 7349371c9d4SSatish Balay xw[4] = x[4 + idx]; 7359371c9d4SSatish Balay xw[5] = x[5 + idx]; 736e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 737e48d15efSToby Isaac v += 36; 738e48d15efSToby Isaac } 739e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7409371c9d4SSatish Balay x[i2] = xw[0]; 7419371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7429371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7439371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7449371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7459371c9d4SSatish Balay x[i2 + 5] = xw[5]; 746e48d15efSToby Isaac idiag -= 36; 747e48d15efSToby Isaac i2 -= 6; 748e48d15efSToby Isaac } 749e48d15efSToby Isaac break; 750e48d15efSToby Isaac case 7: 7519371c9d4SSatish Balay s[0] = xb[i2]; 7529371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7539371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7549371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7559371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7569371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7579371c9d4SSatish Balay s[6] = xb[i2 + 6]; 758e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7599371c9d4SSatish Balay x[i2] = xw[0]; 7609371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7619371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7629371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7639371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7649371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7659371c9d4SSatish Balay x[i2 + 6] = xw[6]; 766e48d15efSToby Isaac i2 -= 7; 767e48d15efSToby Isaac idiag -= 49; 768e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 769e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 770e48d15efSToby Isaac vi = aj + diag[i] + 1; 771e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7729371c9d4SSatish Balay s[0] = xb[i2]; 7739371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7749371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7759371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7769371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7779371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7789371c9d4SSatish Balay s[6] = xb[i2 + 6]; 779e48d15efSToby Isaac while (nz--) { 780e48d15efSToby Isaac idx = 7 * (*vi++); 7819371c9d4SSatish Balay xw[0] = x[idx]; 7829371c9d4SSatish Balay xw[1] = x[1 + idx]; 7839371c9d4SSatish Balay xw[2] = x[2 + idx]; 7849371c9d4SSatish Balay xw[3] = x[3 + idx]; 7859371c9d4SSatish Balay xw[4] = x[4 + idx]; 7869371c9d4SSatish Balay xw[5] = x[5 + idx]; 7879371c9d4SSatish Balay xw[6] = x[6 + idx]; 788e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 789e48d15efSToby Isaac v += 49; 790e48d15efSToby Isaac } 791e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 7929371c9d4SSatish Balay x[i2] = xw[0]; 7939371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7949371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7959371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7969371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7979371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7989371c9d4SSatish Balay x[i2 + 6] = xw[6]; 799e48d15efSToby Isaac idiag -= 49; 800e48d15efSToby Isaac i2 -= 7; 801e48d15efSToby Isaac } 802e48d15efSToby Isaac break; 803e48d15efSToby Isaac default: 8049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 80596b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 806de80f912SBarry Smith i2 -= bs; 807e48d15efSToby Isaac idiag -= bs2; 808de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 809de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 810de80f912SBarry Smith vi = aj + diag[i] + 1; 811de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 812de80f912SBarry Smith 8139566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 814de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 815de80f912SBarry Smith workt = work; 816de80f912SBarry Smith for (j = 0; j < nz; j++) { 8179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 818de80f912SBarry Smith workt += bs; 819de80f912SBarry Smith } 82096b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 82196b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 822e48d15efSToby Isaac 823de80f912SBarry Smith idiag -= bs2; 824de80f912SBarry Smith i2 -= bs; 825de80f912SBarry Smith } 826e48d15efSToby Isaac break; 827e48d15efSToby Isaac } 8289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 829de80f912SBarry Smith } 830e48d15efSToby Isaac its--; 831e48d15efSToby Isaac } 832e48d15efSToby Isaac while (its--) { 833e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 834e48d15efSToby Isaac idiag = a->idiag; 835e48d15efSToby Isaac i2 = 0; 836e48d15efSToby Isaac switch (bs) { 837e48d15efSToby Isaac case 1: 838e48d15efSToby Isaac for (i = 0; i < m; i++) { 839e48d15efSToby Isaac v = aa + ai[i]; 840e48d15efSToby Isaac vi = aj + ai[i]; 841e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 842e48d15efSToby Isaac s[0] = b[i2]; 843e48d15efSToby Isaac for (j = 0; j < nz; j++) { 844e48d15efSToby Isaac xw[0] = x[vi[j]]; 845e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 846e48d15efSToby Isaac } 847e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 848e48d15efSToby Isaac x[i2] += xw[0]; 849e48d15efSToby Isaac idiag += 1; 850e48d15efSToby Isaac i2 += 1; 851e48d15efSToby Isaac } 852e48d15efSToby Isaac break; 853e48d15efSToby Isaac case 2: 854e48d15efSToby Isaac for (i = 0; i < m; i++) { 855e48d15efSToby Isaac v = aa + 4 * ai[i]; 856e48d15efSToby Isaac vi = aj + ai[i]; 857e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8589371c9d4SSatish Balay s[0] = b[i2]; 8599371c9d4SSatish Balay s[1] = b[i2 + 1]; 860e48d15efSToby Isaac for (j = 0; j < nz; j++) { 861e48d15efSToby Isaac idx = 2 * vi[j]; 862e48d15efSToby Isaac it = 4 * j; 8639371c9d4SSatish Balay xw[0] = x[idx]; 8649371c9d4SSatish Balay xw[1] = x[1 + idx]; 865e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 866e48d15efSToby Isaac } 867e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8689371c9d4SSatish Balay x[i2] += xw[0]; 8699371c9d4SSatish Balay x[i2 + 1] += xw[1]; 870e48d15efSToby Isaac idiag += 4; 871e48d15efSToby Isaac i2 += 2; 872e48d15efSToby Isaac } 873e48d15efSToby Isaac break; 874e48d15efSToby Isaac case 3: 875e48d15efSToby Isaac for (i = 0; i < m; i++) { 876e48d15efSToby Isaac v = aa + 9 * ai[i]; 877e48d15efSToby Isaac vi = aj + ai[i]; 878e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8799371c9d4SSatish Balay s[0] = b[i2]; 8809371c9d4SSatish Balay s[1] = b[i2 + 1]; 8819371c9d4SSatish Balay s[2] = b[i2 + 2]; 882e48d15efSToby Isaac while (nz--) { 883e48d15efSToby Isaac idx = 3 * (*vi++); 8849371c9d4SSatish Balay xw[0] = x[idx]; 8859371c9d4SSatish Balay xw[1] = x[1 + idx]; 8869371c9d4SSatish Balay xw[2] = x[2 + idx]; 887e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 888e48d15efSToby Isaac v += 9; 889e48d15efSToby Isaac } 890e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 8919371c9d4SSatish Balay x[i2] += xw[0]; 8929371c9d4SSatish Balay x[i2 + 1] += xw[1]; 8939371c9d4SSatish Balay x[i2 + 2] += xw[2]; 894e48d15efSToby Isaac idiag += 9; 895e48d15efSToby Isaac i2 += 3; 896e48d15efSToby Isaac } 897e48d15efSToby Isaac break; 898e48d15efSToby Isaac case 4: 899e48d15efSToby Isaac for (i = 0; i < m; i++) { 900e48d15efSToby Isaac v = aa + 16 * ai[i]; 901e48d15efSToby Isaac vi = aj + ai[i]; 902e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9039371c9d4SSatish Balay s[0] = b[i2]; 9049371c9d4SSatish Balay s[1] = b[i2 + 1]; 9059371c9d4SSatish Balay s[2] = b[i2 + 2]; 9069371c9d4SSatish Balay s[3] = b[i2 + 3]; 907e48d15efSToby Isaac while (nz--) { 908e48d15efSToby Isaac idx = 4 * (*vi++); 9099371c9d4SSatish Balay xw[0] = x[idx]; 9109371c9d4SSatish Balay xw[1] = x[1 + idx]; 9119371c9d4SSatish Balay xw[2] = x[2 + idx]; 9129371c9d4SSatish Balay xw[3] = x[3 + idx]; 913e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 914e48d15efSToby Isaac v += 16; 915e48d15efSToby Isaac } 916e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9179371c9d4SSatish Balay x[i2] += xw[0]; 9189371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9199371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9209371c9d4SSatish Balay x[i2 + 3] += xw[3]; 921e48d15efSToby Isaac idiag += 16; 922e48d15efSToby Isaac i2 += 4; 923e48d15efSToby Isaac } 924e48d15efSToby Isaac break; 925e48d15efSToby Isaac case 5: 926e48d15efSToby Isaac for (i = 0; i < m; i++) { 927e48d15efSToby Isaac v = aa + 25 * ai[i]; 928e48d15efSToby Isaac vi = aj + ai[i]; 929e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9309371c9d4SSatish Balay s[0] = b[i2]; 9319371c9d4SSatish Balay s[1] = b[i2 + 1]; 9329371c9d4SSatish Balay s[2] = b[i2 + 2]; 9339371c9d4SSatish Balay s[3] = b[i2 + 3]; 9349371c9d4SSatish Balay s[4] = b[i2 + 4]; 935e48d15efSToby Isaac while (nz--) { 936e48d15efSToby Isaac idx = 5 * (*vi++); 9379371c9d4SSatish Balay xw[0] = x[idx]; 9389371c9d4SSatish Balay xw[1] = x[1 + idx]; 9399371c9d4SSatish Balay xw[2] = x[2 + idx]; 9409371c9d4SSatish Balay xw[3] = x[3 + idx]; 9419371c9d4SSatish Balay xw[4] = x[4 + idx]; 942e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 943e48d15efSToby Isaac v += 25; 944e48d15efSToby Isaac } 945e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9469371c9d4SSatish Balay x[i2] += xw[0]; 9479371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9489371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9499371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9509371c9d4SSatish Balay x[i2 + 4] += xw[4]; 951e48d15efSToby Isaac idiag += 25; 952e48d15efSToby Isaac i2 += 5; 953e48d15efSToby Isaac } 954e48d15efSToby Isaac break; 955e48d15efSToby Isaac case 6: 956e48d15efSToby Isaac for (i = 0; i < m; i++) { 957e48d15efSToby Isaac v = aa + 36 * ai[i]; 958e48d15efSToby Isaac vi = aj + ai[i]; 959e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9609371c9d4SSatish Balay s[0] = b[i2]; 9619371c9d4SSatish Balay s[1] = b[i2 + 1]; 9629371c9d4SSatish Balay s[2] = b[i2 + 2]; 9639371c9d4SSatish Balay s[3] = b[i2 + 3]; 9649371c9d4SSatish Balay s[4] = b[i2 + 4]; 9659371c9d4SSatish Balay s[5] = b[i2 + 5]; 966e48d15efSToby Isaac while (nz--) { 967e48d15efSToby Isaac idx = 6 * (*vi++); 9689371c9d4SSatish Balay xw[0] = x[idx]; 9699371c9d4SSatish Balay xw[1] = x[1 + idx]; 9709371c9d4SSatish Balay xw[2] = x[2 + idx]; 9719371c9d4SSatish Balay xw[3] = x[3 + idx]; 9729371c9d4SSatish Balay xw[4] = x[4 + idx]; 9739371c9d4SSatish Balay xw[5] = x[5 + idx]; 974e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 975e48d15efSToby Isaac v += 36; 976e48d15efSToby Isaac } 977e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9789371c9d4SSatish Balay x[i2] += xw[0]; 9799371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9809371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9819371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9829371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9839371c9d4SSatish Balay x[i2 + 5] += xw[5]; 984e48d15efSToby Isaac idiag += 36; 985e48d15efSToby Isaac i2 += 6; 986e48d15efSToby Isaac } 987e48d15efSToby Isaac break; 988e48d15efSToby Isaac case 7: 989e48d15efSToby Isaac for (i = 0; i < m; i++) { 990e48d15efSToby Isaac v = aa + 49 * ai[i]; 991e48d15efSToby Isaac vi = aj + ai[i]; 992e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9939371c9d4SSatish Balay s[0] = b[i2]; 9949371c9d4SSatish Balay s[1] = b[i2 + 1]; 9959371c9d4SSatish Balay s[2] = b[i2 + 2]; 9969371c9d4SSatish Balay s[3] = b[i2 + 3]; 9979371c9d4SSatish Balay s[4] = b[i2 + 4]; 9989371c9d4SSatish Balay s[5] = b[i2 + 5]; 9999371c9d4SSatish Balay s[6] = b[i2 + 6]; 1000e48d15efSToby Isaac while (nz--) { 1001e48d15efSToby Isaac idx = 7 * (*vi++); 10029371c9d4SSatish Balay xw[0] = x[idx]; 10039371c9d4SSatish Balay xw[1] = x[1 + idx]; 10049371c9d4SSatish Balay xw[2] = x[2 + idx]; 10059371c9d4SSatish Balay xw[3] = x[3 + idx]; 10069371c9d4SSatish Balay xw[4] = x[4 + idx]; 10079371c9d4SSatish Balay xw[5] = x[5 + idx]; 10089371c9d4SSatish Balay xw[6] = x[6 + idx]; 1009e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1010e48d15efSToby Isaac v += 49; 1011e48d15efSToby Isaac } 1012e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10139371c9d4SSatish Balay x[i2] += xw[0]; 10149371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10159371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10169371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10179371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10189371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10199371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1020e48d15efSToby Isaac idiag += 49; 1021e48d15efSToby Isaac i2 += 7; 1022e48d15efSToby Isaac } 1023e48d15efSToby Isaac break; 1024e48d15efSToby Isaac default: 1025e48d15efSToby Isaac for (i = 0; i < m; i++) { 1026e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1027e48d15efSToby Isaac vi = aj + ai[i]; 1028e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1029e48d15efSToby Isaac 10309566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1031e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1032e48d15efSToby Isaac workt = work; 1033e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10349566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1035e48d15efSToby Isaac workt += bs; 1036e48d15efSToby Isaac } 1037e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1038e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1039e48d15efSToby Isaac 1040e48d15efSToby Isaac idiag += bs2; 1041e48d15efSToby Isaac i2 += bs; 1042e48d15efSToby Isaac } 1043e48d15efSToby Isaac break; 1044e48d15efSToby Isaac } 10459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1046e48d15efSToby Isaac } 1047e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1048e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1049e48d15efSToby Isaac i2 = bs * (m - 1); 1050e48d15efSToby Isaac switch (bs) { 1051e48d15efSToby Isaac case 1: 1052e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1053e48d15efSToby Isaac v = aa + ai[i]; 1054e48d15efSToby Isaac vi = aj + ai[i]; 1055e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1056e48d15efSToby Isaac s[0] = b[i2]; 1057e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1058e48d15efSToby Isaac xw[0] = x[vi[j]]; 1059e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1060e48d15efSToby Isaac } 1061e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1062e48d15efSToby Isaac x[i2] += xw[0]; 1063e48d15efSToby Isaac idiag -= 1; 1064e48d15efSToby Isaac i2 -= 1; 1065e48d15efSToby Isaac } 1066e48d15efSToby Isaac break; 1067e48d15efSToby Isaac case 2: 1068e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1069e48d15efSToby Isaac v = aa + 4 * ai[i]; 1070e48d15efSToby Isaac vi = aj + ai[i]; 1071e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10729371c9d4SSatish Balay s[0] = b[i2]; 10739371c9d4SSatish Balay s[1] = b[i2 + 1]; 1074e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1075e48d15efSToby Isaac idx = 2 * vi[j]; 1076e48d15efSToby Isaac it = 4 * j; 10779371c9d4SSatish Balay xw[0] = x[idx]; 10789371c9d4SSatish Balay xw[1] = x[1 + idx]; 1079e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1080e48d15efSToby Isaac } 1081e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10829371c9d4SSatish Balay x[i2] += xw[0]; 10839371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1084e48d15efSToby Isaac idiag -= 4; 1085e48d15efSToby Isaac i2 -= 2; 1086e48d15efSToby Isaac } 1087e48d15efSToby Isaac break; 1088e48d15efSToby Isaac case 3: 1089e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1090e48d15efSToby Isaac v = aa + 9 * ai[i]; 1091e48d15efSToby Isaac vi = aj + ai[i]; 1092e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10939371c9d4SSatish Balay s[0] = b[i2]; 10949371c9d4SSatish Balay s[1] = b[i2 + 1]; 10959371c9d4SSatish Balay s[2] = b[i2 + 2]; 1096e48d15efSToby Isaac while (nz--) { 1097e48d15efSToby Isaac idx = 3 * (*vi++); 10989371c9d4SSatish Balay xw[0] = x[idx]; 10999371c9d4SSatish Balay xw[1] = x[1 + idx]; 11009371c9d4SSatish Balay xw[2] = x[2 + idx]; 1101e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1102e48d15efSToby Isaac v += 9; 1103e48d15efSToby Isaac } 1104e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11059371c9d4SSatish Balay x[i2] += xw[0]; 11069371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11079371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1108e48d15efSToby Isaac idiag -= 9; 1109e48d15efSToby Isaac i2 -= 3; 1110e48d15efSToby Isaac } 1111e48d15efSToby Isaac break; 1112e48d15efSToby Isaac case 4: 1113e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1114e48d15efSToby Isaac v = aa + 16 * ai[i]; 1115e48d15efSToby Isaac vi = aj + ai[i]; 1116e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11179371c9d4SSatish Balay s[0] = b[i2]; 11189371c9d4SSatish Balay s[1] = b[i2 + 1]; 11199371c9d4SSatish Balay s[2] = b[i2 + 2]; 11209371c9d4SSatish Balay s[3] = b[i2 + 3]; 1121e48d15efSToby Isaac while (nz--) { 1122e48d15efSToby Isaac idx = 4 * (*vi++); 11239371c9d4SSatish Balay xw[0] = x[idx]; 11249371c9d4SSatish Balay xw[1] = x[1 + idx]; 11259371c9d4SSatish Balay xw[2] = x[2 + idx]; 11269371c9d4SSatish Balay xw[3] = x[3 + idx]; 1127e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1128e48d15efSToby Isaac v += 16; 1129e48d15efSToby Isaac } 1130e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11319371c9d4SSatish Balay x[i2] += xw[0]; 11329371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11339371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11349371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1135e48d15efSToby Isaac idiag -= 16; 1136e48d15efSToby Isaac i2 -= 4; 1137e48d15efSToby Isaac } 1138e48d15efSToby Isaac break; 1139e48d15efSToby Isaac case 5: 1140e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1141e48d15efSToby Isaac v = aa + 25 * ai[i]; 1142e48d15efSToby Isaac vi = aj + ai[i]; 1143e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11449371c9d4SSatish Balay s[0] = b[i2]; 11459371c9d4SSatish Balay s[1] = b[i2 + 1]; 11469371c9d4SSatish Balay s[2] = b[i2 + 2]; 11479371c9d4SSatish Balay s[3] = b[i2 + 3]; 11489371c9d4SSatish Balay s[4] = b[i2 + 4]; 1149e48d15efSToby Isaac while (nz--) { 1150e48d15efSToby Isaac idx = 5 * (*vi++); 11519371c9d4SSatish Balay xw[0] = x[idx]; 11529371c9d4SSatish Balay xw[1] = x[1 + idx]; 11539371c9d4SSatish Balay xw[2] = x[2 + idx]; 11549371c9d4SSatish Balay xw[3] = x[3 + idx]; 11559371c9d4SSatish Balay xw[4] = x[4 + idx]; 1156e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1157e48d15efSToby Isaac v += 25; 1158e48d15efSToby Isaac } 1159e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11609371c9d4SSatish Balay x[i2] += xw[0]; 11619371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11629371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11639371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11649371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1165e48d15efSToby Isaac idiag -= 25; 1166e48d15efSToby Isaac i2 -= 5; 1167e48d15efSToby Isaac } 1168e48d15efSToby Isaac break; 1169e48d15efSToby Isaac case 6: 1170e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1171e48d15efSToby Isaac v = aa + 36 * ai[i]; 1172e48d15efSToby Isaac vi = aj + ai[i]; 1173e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11749371c9d4SSatish Balay s[0] = b[i2]; 11759371c9d4SSatish Balay s[1] = b[i2 + 1]; 11769371c9d4SSatish Balay s[2] = b[i2 + 2]; 11779371c9d4SSatish Balay s[3] = b[i2 + 3]; 11789371c9d4SSatish Balay s[4] = b[i2 + 4]; 11799371c9d4SSatish Balay s[5] = b[i2 + 5]; 1180e48d15efSToby Isaac while (nz--) { 1181e48d15efSToby Isaac idx = 6 * (*vi++); 11829371c9d4SSatish Balay xw[0] = x[idx]; 11839371c9d4SSatish Balay xw[1] = x[1 + idx]; 11849371c9d4SSatish Balay xw[2] = x[2 + idx]; 11859371c9d4SSatish Balay xw[3] = x[3 + idx]; 11869371c9d4SSatish Balay xw[4] = x[4 + idx]; 11879371c9d4SSatish Balay xw[5] = x[5 + idx]; 1188e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1189e48d15efSToby Isaac v += 36; 1190e48d15efSToby Isaac } 1191e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 11929371c9d4SSatish Balay x[i2] += xw[0]; 11939371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11949371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11959371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11969371c9d4SSatish Balay x[i2 + 4] += xw[4]; 11979371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1198e48d15efSToby Isaac idiag -= 36; 1199e48d15efSToby Isaac i2 -= 6; 1200e48d15efSToby Isaac } 1201e48d15efSToby Isaac break; 1202e48d15efSToby Isaac case 7: 1203e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1204e48d15efSToby Isaac v = aa + 49 * ai[i]; 1205e48d15efSToby Isaac vi = aj + ai[i]; 1206e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12079371c9d4SSatish Balay s[0] = b[i2]; 12089371c9d4SSatish Balay s[1] = b[i2 + 1]; 12099371c9d4SSatish Balay s[2] = b[i2 + 2]; 12109371c9d4SSatish Balay s[3] = b[i2 + 3]; 12119371c9d4SSatish Balay s[4] = b[i2 + 4]; 12129371c9d4SSatish Balay s[5] = b[i2 + 5]; 12139371c9d4SSatish Balay s[6] = b[i2 + 6]; 1214e48d15efSToby Isaac while (nz--) { 1215e48d15efSToby Isaac idx = 7 * (*vi++); 12169371c9d4SSatish Balay xw[0] = x[idx]; 12179371c9d4SSatish Balay xw[1] = x[1 + idx]; 12189371c9d4SSatish Balay xw[2] = x[2 + idx]; 12199371c9d4SSatish Balay xw[3] = x[3 + idx]; 12209371c9d4SSatish Balay xw[4] = x[4 + idx]; 12219371c9d4SSatish Balay xw[5] = x[5 + idx]; 12229371c9d4SSatish Balay xw[6] = x[6 + idx]; 1223e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1224e48d15efSToby Isaac v += 49; 1225e48d15efSToby Isaac } 1226e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12279371c9d4SSatish Balay x[i2] += xw[0]; 12289371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12299371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12309371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12319371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12329371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12339371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1234e48d15efSToby Isaac idiag -= 49; 1235e48d15efSToby Isaac i2 -= 7; 1236e48d15efSToby Isaac } 1237e48d15efSToby Isaac break; 1238e48d15efSToby Isaac default: 1239e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1240e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1241e48d15efSToby Isaac vi = aj + ai[i]; 1242e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1243e48d15efSToby Isaac 12449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1245e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1246e48d15efSToby Isaac workt = work; 1247e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1249e48d15efSToby Isaac workt += bs; 1250e48d15efSToby Isaac } 1251e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1252e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1253e48d15efSToby Isaac 1254e48d15efSToby Isaac idiag -= bs2; 1255e48d15efSToby Isaac i2 -= bs; 1256e48d15efSToby Isaac } 1257e48d15efSToby Isaac break; 1258e48d15efSToby Isaac } 12599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1260e48d15efSToby Isaac } 1261e48d15efSToby Isaac } 12629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 1264de80f912SBarry Smith PetscFunctionReturn(0); 1265de80f912SBarry Smith } 1266de80f912SBarry Smith 1267af674e45SBarry Smith /* 126881824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1269af674e45SBarry Smith */ 1270af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1271af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1272af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1273af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1274af674e45SBarry Smith #endif 1275af674e45SBarry Smith 12769371c9d4SSatish Balay PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) { 1277af674e45SBarry Smith Mat A = *AA; 1278af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1279c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1280c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 128117ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1282f15d580aSBarry Smith const PetscScalar *value = v; 12834bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1284af674e45SBarry Smith 1285af674e45SBarry Smith PetscFunctionBegin; 1286ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1287af674e45SBarry Smith stepval = (n - 1) * 4; 1288af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1289af674e45SBarry Smith row = im[k]; 1290af674e45SBarry Smith rp = aj + ai[row]; 1291af674e45SBarry Smith ap = aa + 16 * ai[row]; 1292af674e45SBarry Smith nrow = ailen[row]; 1293af674e45SBarry Smith low = 0; 129417ec6a02SBarry Smith high = nrow; 1295af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1296af674e45SBarry Smith col = in[l]; 1297db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1298db4deed7SKarl Rupp else high = nrow; 129917ec6a02SBarry Smith lastcol = col; 13001e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1301af674e45SBarry Smith while (high - low > 7) { 1302af674e45SBarry Smith t = (low + high) / 2; 1303af674e45SBarry Smith if (rp[t] > col) high = t; 1304af674e45SBarry Smith else low = t; 1305af674e45SBarry Smith } 1306af674e45SBarry Smith for (i = low; i < high; i++) { 1307af674e45SBarry Smith if (rp[i] > col) break; 1308af674e45SBarry Smith if (rp[i] == col) { 1309af674e45SBarry Smith bap = ap + 16 * i; 1310af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 13119371c9d4SSatish Balay for (jj = ii; jj < 16; jj += 4) { bap[jj] += *value++; } 1312af674e45SBarry Smith } 1313af674e45SBarry Smith goto noinsert2; 1314af674e45SBarry Smith } 1315af674e45SBarry Smith } 1316af674e45SBarry Smith N = nrow++ - 1; 131717ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1318af674e45SBarry Smith /* shift up all the later entries in this row */ 1319af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1320af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13219566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1322af674e45SBarry Smith } 1323*48a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1324af674e45SBarry Smith rp[i] = col; 1325af674e45SBarry Smith bap = ap + 16 * i; 1326af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 13279371c9d4SSatish Balay for (jj = ii; jj < 16; jj += 4) { bap[jj] = *value++; } 1328af674e45SBarry Smith } 1329af674e45SBarry Smith noinsert2:; 1330af674e45SBarry Smith low = i; 1331af674e45SBarry Smith } 1332af674e45SBarry Smith ailen[row] = nrow; 1333af674e45SBarry Smith } 1334be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1335af674e45SBarry Smith } 1336af674e45SBarry Smith 1337af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1338af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1339af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1340af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1341af674e45SBarry Smith #endif 1342af674e45SBarry Smith 13439371c9d4SSatish Balay PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) { 1344af674e45SBarry Smith Mat A = *AA; 1345af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1346580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1347c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1348c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 134917ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1350af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1351af674e45SBarry Smith 1352af674e45SBarry Smith PetscFunctionBegin; 1353af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13549371c9d4SSatish Balay row = im[k]; 13559371c9d4SSatish Balay brow = row / 4; 1356af674e45SBarry Smith rp = aj + ai[brow]; 1357af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1358af674e45SBarry Smith nrow = ailen[brow]; 1359af674e45SBarry Smith low = 0; 136017ec6a02SBarry Smith high = nrow; 1361af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13629371c9d4SSatish Balay col = in[l]; 13639371c9d4SSatish Balay bcol = col / 4; 13649371c9d4SSatish Balay ridx = row % 4; 13659371c9d4SSatish Balay cidx = col % 4; 1366af674e45SBarry Smith value = v[l + k * n]; 1367db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1368db4deed7SKarl Rupp else high = nrow; 136917ec6a02SBarry Smith lastcol = col; 1370af674e45SBarry Smith while (high - low > 7) { 1371af674e45SBarry Smith t = (low + high) / 2; 1372af674e45SBarry Smith if (rp[t] > bcol) high = t; 1373af674e45SBarry Smith else low = t; 1374af674e45SBarry Smith } 1375af674e45SBarry Smith for (i = low; i < high; i++) { 1376af674e45SBarry Smith if (rp[i] > bcol) break; 1377af674e45SBarry Smith if (rp[i] == bcol) { 1378af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1379af674e45SBarry Smith *bap += value; 1380af674e45SBarry Smith goto noinsert1; 1381af674e45SBarry Smith } 1382af674e45SBarry Smith } 1383af674e45SBarry Smith N = nrow++ - 1; 138417ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1385af674e45SBarry Smith /* shift up all the later entries in this row */ 13869566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 13879566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 13889566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1389af674e45SBarry Smith rp[i] = bcol; 1390af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1391af674e45SBarry Smith noinsert1:; 1392af674e45SBarry Smith low = i; 1393af674e45SBarry Smith } 1394af674e45SBarry Smith ailen[brow] = nrow; 1395af674e45SBarry Smith } 1396be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1397af674e45SBarry Smith } 1398af674e45SBarry Smith 1399be5855fcSBarry Smith /* 1400be5855fcSBarry Smith Checks for missing diagonals 1401be5855fcSBarry Smith */ 14029371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 1403be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14047734d3b5SMatthew G. Knepley PetscInt *diag, *ii = a->i, i; 1405be5855fcSBarry Smith 1406be5855fcSBarry Smith PetscFunctionBegin; 14079566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 14082af78befSBarry Smith *missing = PETSC_FALSE; 14097734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 14102efa7f71SHong Zhang *missing = PETSC_TRUE; 14112efa7f71SHong Zhang if (d) *d = 0; 14129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 14132efa7f71SHong Zhang } else { 141401445905SHong Zhang PetscInt n; 141501445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1416883fce79SBarry Smith diag = a->diag; 141701445905SHong Zhang for (i = 0; i < n; i++) { 14187734d3b5SMatthew G. Knepley if (diag[i] >= ii[i + 1]) { 14192af78befSBarry Smith *missing = PETSC_TRUE; 14202af78befSBarry Smith if (d) *d = i; 14219566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1422358d2f5dSShri Abhyankar break; 14232efa7f71SHong Zhang } 1424be5855fcSBarry Smith } 1425be5855fcSBarry Smith } 1426be5855fcSBarry Smith PetscFunctionReturn(0); 1427be5855fcSBarry Smith } 1428be5855fcSBarry Smith 14299371c9d4SSatish Balay PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) { 1430de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 143109f38230SBarry Smith PetscInt i, j, m = a->mbs; 1432de6a44a3SBarry Smith 14333a40ed3dSBarry Smith PetscFunctionBegin; 143409f38230SBarry Smith if (!a->diag) { 14359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->diag)); 14369566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, m * sizeof(PetscInt))); 14374fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 143809f38230SBarry Smith } 14397fc0212eSBarry Smith for (i = 0; i < m; i++) { 144009f38230SBarry Smith a->diag[i] = a->i[i + 1]; 1441de6a44a3SBarry Smith for (j = a->i[i]; j < a->i[i + 1]; j++) { 1442de6a44a3SBarry Smith if (a->j[j] == i) { 144309f38230SBarry Smith a->diag[i] = j; 1444de6a44a3SBarry Smith break; 1445de6a44a3SBarry Smith } 1446de6a44a3SBarry Smith } 1447de6a44a3SBarry Smith } 14483a40ed3dSBarry Smith PetscFunctionReturn(0); 1449de6a44a3SBarry Smith } 14502593348eSBarry Smith 14519371c9d4SSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) { 14523b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14531a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14541a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14553b2fbd54SBarry Smith 14563a40ed3dSBarry Smith PetscFunctionBegin; 14573b2fbd54SBarry Smith *nn = n; 14583a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 14593b2fbd54SBarry Smith if (symmetric) { 14609566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1461553b3c51SBarry Smith nz = tia[n]; 14623b2fbd54SBarry Smith } else { 14639371c9d4SSatish Balay tia = a->i; 14649371c9d4SSatish Balay tja = a->j; 14653b2fbd54SBarry Smith } 14663b2fbd54SBarry Smith 1467ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1468ecc77c7aSBarry Smith (*nn) *= bs; 14698f7157efSSatish Balay /* malloc & create the natural set of indices */ 14709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14719985e31cSBarry Smith if (n) { 14722462f5fdSStefano Zampini (*ia)[0] = oshift; 14739371c9d4SSatish Balay for (j = 1; j < bs; j++) { (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; } 14749985e31cSBarry Smith } 1475ecc77c7aSBarry Smith 1476ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1477ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 14789371c9d4SSatish Balay for (j = 1; j < bs; j++) { (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; } 14798f7157efSSatish Balay } 14809371c9d4SSatish Balay if (n) { (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; } 1481ecc77c7aSBarry Smith 14821a83f524SJed Brown if (inja) { 14839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 14849985e31cSBarry Smith cnt = 0; 14859985e31cSBarry Smith for (i = 0; i < n; i++) { 14869985e31cSBarry Smith for (j = 0; j < bs; j++) { 14879985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 14889371c9d4SSatish Balay for (l = 0; l < bs; l++) { (*ja)[cnt++] = bs * tja[k] + l; } 14899985e31cSBarry Smith } 14909985e31cSBarry Smith } 14919985e31cSBarry Smith } 14929985e31cSBarry Smith } 14939985e31cSBarry Smith 14948f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 14959566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 14969566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 14978f7157efSSatish Balay } 1498f6d58c54SBarry Smith } else if (oshift == 1) { 1499715a17b5SBarry Smith if (symmetric) { 1500a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1501715a17b5SBarry Smith /* add 1 to i and j indices */ 1502715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1503715a17b5SBarry Smith *ia = tia; 1504715a17b5SBarry Smith if (ja) { 1505715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1506715a17b5SBarry Smith *ja = tja; 1507715a17b5SBarry Smith } 1508715a17b5SBarry Smith } else { 1509a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1510f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 15119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1512f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1513f6d58c54SBarry Smith if (ja) { 15149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1515f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1516f6d58c54SBarry Smith } 1517715a17b5SBarry Smith } 15188f7157efSSatish Balay } else { 15198f7157efSSatish Balay *ia = tia; 1520ecc77c7aSBarry Smith if (ja) *ja = tja; 15218f7157efSSatish Balay } 15223a40ed3dSBarry Smith PetscFunctionReturn(0); 15233b2fbd54SBarry Smith } 15243b2fbd54SBarry Smith 15259371c9d4SSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 15263a40ed3dSBarry Smith PetscFunctionBegin; 15273a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1528715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 15299566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 15309566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 15313b2fbd54SBarry Smith } 15323a40ed3dSBarry Smith PetscFunctionReturn(0); 15333b2fbd54SBarry Smith } 15343b2fbd54SBarry Smith 15359371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqBAIJ(Mat A) { 15362d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15372d61bbb3SSatish Balay 1538433994e6SBarry Smith PetscFunctionBegin; 1539aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1540c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz); 15412d61bbb3SSatish Balay #endif 15429566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15439566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15449566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 15459566063dSJacob Faibussowitsch if (a->free_diag) PetscCall(PetscFree(a->diag)); 15469566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15479566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15489566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15499566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15509566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15519566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15539566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15549566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1555c4319e64SHong Zhang 15569566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15589566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1559901853e0SKris Buschelman 15609566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 15727ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 15739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 15747ea3e4caSstefano_zampini #endif 15759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 15762e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 15772d61bbb3SSatish Balay PetscFunctionReturn(0); 15782d61bbb3SSatish Balay } 15792d61bbb3SSatish Balay 15809371c9d4SSatish Balay PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) { 15812d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15822d61bbb3SSatish Balay 15832d61bbb3SSatish Balay PetscFunctionBegin; 1584aa275fccSKris Buschelman switch (op) { 15859371c9d4SSatish Balay case MAT_ROW_ORIENTED: a->roworiented = flg; break; 15869371c9d4SSatish Balay case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break; 15879371c9d4SSatish Balay case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break; 15889371c9d4SSatish Balay case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break; 15899371c9d4SSatish Balay case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break; 15909371c9d4SSatish Balay case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break; 15918c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1592aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1593aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 15949371c9d4SSatish Balay case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 15955021d80fSJed Brown case MAT_SPD: 159677e54ba9SKris Buschelman case MAT_SYMMETRIC: 159777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15989a4540c5SBarry Smith case MAT_HERMITIAN: 15999a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1600b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1601c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1602672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 1603b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1604b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 160577e54ba9SKris Buschelman break; 16069371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 16072d61bbb3SSatish Balay } 16082d61bbb3SSatish Balay PetscFunctionReturn(0); 16092d61bbb3SSatish Balay } 16102d61bbb3SSatish Balay 161152768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 16129371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) { 161352768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 161452768537SHong Zhang MatScalar *aa_i; 161587828ca2SBarry Smith PetscScalar *v_i; 16162d61bbb3SSatish Balay 16172d61bbb3SSatish Balay PetscFunctionBegin; 1618d0f46423SBarry Smith bs = A->rmap->bs; 161952768537SHong Zhang bs2 = bs * bs; 16205f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 16212d61bbb3SSatish Balay 16222d61bbb3SSatish Balay bn = row / bs; /* Block number */ 16232d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 16242d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 16252d61bbb3SSatish Balay *nz = bs * M; 16262d61bbb3SSatish Balay 16272d61bbb3SSatish Balay if (v) { 1628f4259b30SLisandro Dalcin *v = NULL; 16292d61bbb3SSatish Balay if (*nz) { 16309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16312d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16322d61bbb3SSatish Balay v_i = *v + i * bs; 16332d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 163426fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16352d61bbb3SSatish Balay } 16362d61bbb3SSatish Balay } 16372d61bbb3SSatish Balay } 16382d61bbb3SSatish Balay 16392d61bbb3SSatish Balay if (idx) { 1640f4259b30SLisandro Dalcin *idx = NULL; 16412d61bbb3SSatish Balay if (*nz) { 16429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16432d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16442d61bbb3SSatish Balay idx_i = *idx + i * bs; 16452d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 164626fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16472d61bbb3SSatish Balay } 16482d61bbb3SSatish Balay } 16492d61bbb3SSatish Balay } 16502d61bbb3SSatish Balay PetscFunctionReturn(0); 16512d61bbb3SSatish Balay } 16522d61bbb3SSatish Balay 16539371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 165452768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 165552768537SHong Zhang 165652768537SHong Zhang PetscFunctionBegin; 16579566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 165852768537SHong Zhang PetscFunctionReturn(0); 165952768537SHong Zhang } 166052768537SHong Zhang 16619371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 16622d61bbb3SSatish Balay PetscFunctionBegin; 1663cb4a9cd9SHong Zhang if (nz) *nz = 0; 16649566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 16659566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 16662d61bbb3SSatish Balay PetscFunctionReturn(0); 16672d61bbb3SSatish Balay } 16682d61bbb3SSatish Balay 16699371c9d4SSatish Balay PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) { 167020e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 16712d61bbb3SSatish Balay Mat C; 167220e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 167320e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 167420e84f26SHong Zhang MatScalar *ata, *aa = a->a; 16752d61bbb3SSatish Balay 16762d61bbb3SSatish Balay PetscFunctionBegin; 16777fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 16789566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1679cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 168020e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 16812d61bbb3SSatish Balay 16829566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 16839566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 16849566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 16859566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 168620e84f26SHong Zhang 168720e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 168820e84f26SHong Zhang ati = at->i; 168920e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1690fc4dec0aSBarry Smith } else { 1691fc4dec0aSBarry Smith C = *B; 169220e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 169320e84f26SHong Zhang ati = at->i; 1694fc4dec0aSBarry Smith } 1695fc4dec0aSBarry Smith 169620e84f26SHong Zhang atj = at->j; 169720e84f26SHong Zhang ata = at->a; 169820e84f26SHong Zhang 169920e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 17009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 170120e84f26SHong Zhang 170220e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 17032d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 170420e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 170520e84f26SHong Zhang for (j = 0; j < anzj; j++) { 170620e84f26SHong Zhang atj[atfill[*aj]] = i; 170720e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 17089371c9d4SSatish Balay for (k = 0; k < bs; k++) { ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; } 17092d61bbb3SSatish Balay } 171020e84f26SHong Zhang atfill[*aj++] += 1; 171120e84f26SHong Zhang } 171220e84f26SHong Zhang } 17139566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 17149566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 17152d61bbb3SSatish Balay 171620e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 17179566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 171820e84f26SHong Zhang 1719cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 17209566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 17212d61bbb3SSatish Balay *B = C; 17222d61bbb3SSatish Balay } else { 17239566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 17242d61bbb3SSatish Balay } 17252d61bbb3SSatish Balay PetscFunctionReturn(0); 17262d61bbb3SSatish Balay } 17272d61bbb3SSatish Balay 17289371c9d4SSatish Balay PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) { 1729453d3561SHong Zhang Mat Btrans; 1730453d3561SHong Zhang 1731453d3561SHong Zhang PetscFunctionBegin; 1732453d3561SHong Zhang *f = PETSC_FALSE; 1733acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17349566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 1736453d3561SHong Zhang PetscFunctionReturn(0); 1737453d3561SHong Zhang } 1738453d3561SHong Zhang 1739618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 17409371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 1741b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1742b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1743b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1744b51a4376SLisandro Dalcin PetscScalar *matvals; 17452593348eSBarry Smith 17463a40ed3dSBarry Smith PetscFunctionBegin; 17479566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17483b2fbd54SBarry Smith 1749b51a4376SLisandro Dalcin M = mat->rmap->N; 1750b51a4376SLisandro Dalcin N = mat->cmap->N; 1751b51a4376SLisandro Dalcin m = mat->rmap->n; 1752b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1753b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 17542593348eSBarry Smith 1755b51a4376SLisandro Dalcin /* write matrix header */ 1756b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 17579371c9d4SSatish Balay header[1] = M; 17589371c9d4SSatish Balay header[2] = N; 17599371c9d4SSatish Balay header[3] = nz; 17609566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 17612593348eSBarry Smith 1762b51a4376SLisandro Dalcin /* store row lengths */ 17639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1764b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 17659371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 17669566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 17679566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1768b51a4376SLisandro Dalcin 1769b51a4376SLisandro Dalcin /* store column indices */ 17709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1771b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1772b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1773b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17749371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 17755f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17769566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 17779566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 17782593348eSBarry Smith 17792593348eSBarry Smith /* store nonzero values */ 17809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1781b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1782b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1783b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17849371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 17855f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17869566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 17879566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1788ce6f0cecSBarry Smith 1789b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 17909566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 17913a40ed3dSBarry Smith PetscFunctionReturn(0); 17922593348eSBarry Smith } 17932593348eSBarry Smith 17949371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) { 17957dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 17967dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 17977dc0baabSHong Zhang 17987dc0baabSHong Zhang PetscFunctionBegin; 17999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 18007dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 18019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1802*48a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 18039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18047dc0baabSHong Zhang } 18059566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18067dc0baabSHong Zhang PetscFunctionReturn(0); 18077dc0baabSHong Zhang } 18087dc0baabSHong Zhang 18099371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) { 1810b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1811d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1812f3ef73ceSBarry Smith PetscViewerFormat format; 18132593348eSBarry Smith 18143a40ed3dSBarry Smith PetscFunctionBegin; 18157dc0baabSHong Zhang if (A->structure_only) { 18169566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 18177dc0baabSHong Zhang PetscFunctionReturn(0); 18187dc0baabSHong Zhang } 18197dc0baabSHong Zhang 18209566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1821456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1823fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1824ade3a672SBarry Smith const char *matname; 1825bcd9e38bSBarry Smith Mat aij; 18269566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18279566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18289566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18299566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 183104929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 183204929863SHong Zhang PetscFunctionReturn(0); 1833fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18349566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 183544cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 183644cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18379566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 183844cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 183944cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1840aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18410e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18429371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18430e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18449371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18450e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18470ef38995SBarry Smith } 184844cd7ae7SLois Curfman McInnes #else 1849*48a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 185044cd7ae7SLois Curfman McInnes #endif 185144cd7ae7SLois Curfman McInnes } 185244cd7ae7SLois Curfman McInnes } 18539566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 185444cd7ae7SLois Curfman McInnes } 185544cd7ae7SLois Curfman McInnes } 18569566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18570ef38995SBarry Smith } else { 18589566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1859b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1860b6490206SBarry Smith for (j = 0; j < bs; j++) { 18619566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1862b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1863b6490206SBarry Smith for (l = 0; l < bs; l++) { 1864aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18650e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 18669371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18670e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 18689371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18690ef38995SBarry Smith } else { 18709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 187188685aaeSLois Curfman McInnes } 187288685aaeSLois Curfman McInnes #else 18739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 187488685aaeSLois Curfman McInnes #endif 18752593348eSBarry Smith } 18762593348eSBarry Smith } 18779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18782593348eSBarry Smith } 18792593348eSBarry Smith } 18809566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1881b6490206SBarry Smith } 18829566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 18833a40ed3dSBarry Smith PetscFunctionReturn(0); 18842593348eSBarry Smith } 18852593348eSBarry Smith 18869804daf3SBarry Smith #include <petscdraw.h> 18879371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) { 188877ed5343SBarry Smith Mat A = (Mat)Aa; 18893270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1890d0f46423SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 18910e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 18923f1db9ecSBarry Smith MatScalar *aa; 1893b0a32e0cSBarry Smith PetscViewer viewer; 1894b3e7f47fSJed Brown PetscViewerFormat format; 18953270192aSSatish Balay 18963a40ed3dSBarry Smith PetscFunctionBegin; 18979566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 18989566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 18999566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 190077ed5343SBarry Smith 19013270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1902b3e7f47fSJed Brown 1903b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1904d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1905383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1906b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 19073270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19083270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19099371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19109371c9d4SSatish Balay y_r = y_l + 1.0; 19119371c9d4SSatish Balay x_l = a->j[j] * bs; 19129371c9d4SSatish Balay x_r = x_l + 1.0; 19133270192aSSatish Balay aa = a->a + j * bs2; 19143270192aSSatish Balay for (k = 0; k < bs; k++) { 19153270192aSSatish Balay for (l = 0; l < bs; l++) { 19160e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 19179566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19183270192aSSatish Balay } 19193270192aSSatish Balay } 19203270192aSSatish Balay } 19213270192aSSatish Balay } 1922b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19233270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19243270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19259371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19269371c9d4SSatish Balay y_r = y_l + 1.0; 19279371c9d4SSatish Balay x_l = a->j[j] * bs; 19289371c9d4SSatish Balay x_r = x_l + 1.0; 19293270192aSSatish Balay aa = a->a + j * bs2; 19303270192aSSatish Balay for (k = 0; k < bs; k++) { 19313270192aSSatish Balay for (l = 0; l < bs; l++) { 19320e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19339566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19343270192aSSatish Balay } 19353270192aSSatish Balay } 19363270192aSSatish Balay } 19373270192aSSatish Balay } 1938b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19393270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19403270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19419371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19429371c9d4SSatish Balay y_r = y_l + 1.0; 19439371c9d4SSatish Balay x_l = a->j[j] * bs; 19449371c9d4SSatish Balay x_r = x_l + 1.0; 19453270192aSSatish Balay aa = a->a + j * bs2; 19463270192aSSatish Balay for (k = 0; k < bs; k++) { 19473270192aSSatish Balay for (l = 0; l < bs; l++) { 19480e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 19499566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19503270192aSSatish Balay } 19513270192aSSatish Balay } 19523270192aSSatish Balay } 19533270192aSSatish Balay } 1954d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1955b3e7f47fSJed Brown } else { 1956b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1957b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1958b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1959b3e7f47fSJed Brown PetscDraw popup; 1960b3e7f47fSJed Brown 1961b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 1962b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1963b3e7f47fSJed Brown } 1964383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 19659566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 19669566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1967383922c3SLisandro Dalcin 1968d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1969b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 1970b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 19719371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19729371c9d4SSatish Balay y_r = y_l + 1.0; 19739371c9d4SSatish Balay x_l = a->j[j] * bs; 19749371c9d4SSatish Balay x_r = x_l + 1.0; 1975b3e7f47fSJed Brown aa = a->a + j * bs2; 1976b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 1977b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 1978383922c3SLisandro Dalcin MatScalar v = *aa++; 1979383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 19809566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1981b3e7f47fSJed Brown } 1982b3e7f47fSJed Brown } 1983b3e7f47fSJed Brown } 1984b3e7f47fSJed Brown } 1985d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1986b3e7f47fSJed Brown } 198777ed5343SBarry Smith PetscFunctionReturn(0); 198877ed5343SBarry Smith } 19893270192aSSatish Balay 19909371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) { 19910e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 1992b0a32e0cSBarry Smith PetscDraw draw; 1993ace3abfcSBarry Smith PetscBool isnull; 19943270192aSSatish Balay 199577ed5343SBarry Smith PetscFunctionBegin; 19969566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 19979566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 199845f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 199977ed5343SBarry Smith 20009371c9d4SSatish Balay xr = A->cmap->n; 20019371c9d4SSatish Balay yr = A->rmap->N; 20029371c9d4SSatish Balay h = yr / 10.0; 20039371c9d4SSatish Balay w = xr / 10.0; 20049371c9d4SSatish Balay xr += w; 20059371c9d4SSatish Balay yr += h; 20069371c9d4SSatish Balay xl = -w; 20079371c9d4SSatish Balay yl = -h; 20089566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 20099566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 20109566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 20119566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 20129566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 20133a40ed3dSBarry Smith PetscFunctionReturn(0); 20143270192aSSatish Balay } 20153270192aSSatish Balay 20169371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) { 2017ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw; 20182593348eSBarry Smith 20193a40ed3dSBarry Smith PetscFunctionBegin; 20209566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 20219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20229566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 202332077d6dSBarry Smith if (iascii) { 20249566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20250f5bd95cSBarry Smith } else if (isbinary) { 20269566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20270f5bd95cSBarry Smith } else if (isdraw) { 20289566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20295cd90555SBarry Smith } else { 2030a5e6ed63SBarry Smith Mat B; 20319566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20329566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20339566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20342593348eSBarry Smith } 20353a40ed3dSBarry Smith PetscFunctionReturn(0); 20362593348eSBarry Smith } 2037b6490206SBarry Smith 20389371c9d4SSatish Balay PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) { 2039cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2040c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2041c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2042d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 204397e567efSBarry Smith MatScalar *ap, *aa = a->a; 2044cd0e1443SSatish Balay 20453a40ed3dSBarry Smith PetscFunctionBegin; 20462d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 20479371c9d4SSatish Balay row = im[k]; 20489371c9d4SSatish Balay brow = row / bs; 20499371c9d4SSatish Balay if (row < 0) { 20509371c9d4SSatish Balay v += n; 20519371c9d4SSatish Balay continue; 20529371c9d4SSatish Balay } /* negative row */ 205354c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2054d29f2997SMatthew Woehlke rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2055d29f2997SMatthew Woehlke ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 20562c3acbe9SBarry Smith nrow = ailen[brow]; 20572d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 20589371c9d4SSatish Balay if (in[l] < 0) { 20599371c9d4SSatish Balay v++; 20609371c9d4SSatish Balay continue; 20619371c9d4SSatish Balay } /* negative column */ 206254c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 20632d61bbb3SSatish Balay col = in[l]; 20642d61bbb3SSatish Balay bcol = col / bs; 20652d61bbb3SSatish Balay cidx = col % bs; 20662d61bbb3SSatish Balay ridx = row % bs; 20672d61bbb3SSatish Balay high = nrow; 20682d61bbb3SSatish Balay low = 0; /* assume unsorted */ 20692d61bbb3SSatish Balay while (high - low > 5) { 2070cd0e1443SSatish Balay t = (low + high) / 2; 2071cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2072cd0e1443SSatish Balay else low = t; 2073cd0e1443SSatish Balay } 2074cd0e1443SSatish Balay for (i = low; i < high; i++) { 2075cd0e1443SSatish Balay if (rp[i] > bcol) break; 2076cd0e1443SSatish Balay if (rp[i] == bcol) { 20772d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 20782d61bbb3SSatish Balay goto finished; 2079cd0e1443SSatish Balay } 2080cd0e1443SSatish Balay } 208197e567efSBarry Smith *v++ = 0.0; 20822d61bbb3SSatish Balay finished:; 2083cd0e1443SSatish Balay } 2084cd0e1443SSatish Balay } 20853a40ed3dSBarry Smith PetscFunctionReturn(0); 2086cd0e1443SSatish Balay } 2087cd0e1443SSatish Balay 20889371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 208992c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2090e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2091c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2092d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2093ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2094dd6ea824SBarry Smith const PetscScalar *value = v; 20959d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 209692c4ed94SBarry Smith 20973a40ed3dSBarry Smith PetscFunctionBegin; 20980e324ae4SSatish Balay if (roworiented) { 20990e324ae4SSatish Balay stepval = (n - 1) * bs; 21000e324ae4SSatish Balay } else { 21010e324ae4SSatish Balay stepval = (m - 1) * bs; 21020e324ae4SSatish Balay } 210392c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 210492c4ed94SBarry Smith row = im[k]; 21055ef9f2a5SBarry Smith if (row < 0) continue; 21066bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 210792c4ed94SBarry Smith rp = aj + ai[row]; 21087dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 210992c4ed94SBarry Smith rmax = imax[row]; 211092c4ed94SBarry Smith nrow = ailen[row]; 211192c4ed94SBarry Smith low = 0; 2112c71e6ed7SBarry Smith high = nrow; 211392c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 21145ef9f2a5SBarry Smith if (in[l] < 0) continue; 21156bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 211692c4ed94SBarry Smith col = in[l]; 21177dc0baabSHong Zhang if (!A->structure_only) { 211892c4ed94SBarry Smith if (roworiented) { 211953ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21200e324ae4SSatish Balay } else { 212153ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 212292c4ed94SBarry Smith } 21237dc0baabSHong Zhang } 212426fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 212526fbe8dcSKarl Rupp else high = nrow; 2126e2ee6c50SBarry Smith lastcol = col; 212792c4ed94SBarry Smith while (high - low > 7) { 212892c4ed94SBarry Smith t = (low + high) / 2; 212992c4ed94SBarry Smith if (rp[t] > col) high = t; 213092c4ed94SBarry Smith else low = t; 213192c4ed94SBarry Smith } 213292c4ed94SBarry Smith for (i = low; i < high; i++) { 213392c4ed94SBarry Smith if (rp[i] > col) break; 213492c4ed94SBarry Smith if (rp[i] == col) { 21357dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21368a84c255SSatish Balay bap = ap + bs2 * i; 21370e324ae4SSatish Balay if (roworiented) { 21388a84c255SSatish Balay if (is == ADD_VALUES) { 2139dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 21409371c9d4SSatish Balay for (jj = ii; jj < bs2; jj += bs) { bap[jj] += *value++; } 2141dd9472c6SBarry Smith } 21420e324ae4SSatish Balay } else { 2143dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 21449371c9d4SSatish Balay for (jj = ii; jj < bs2; jj += bs) { bap[jj] = *value++; } 2145dd9472c6SBarry Smith } 2146dd9472c6SBarry Smith } 21470e324ae4SSatish Balay } else { 21480e324ae4SSatish Balay if (is == ADD_VALUES) { 214953ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 21509371c9d4SSatish Balay for (jj = 0; jj < bs; jj++) { bap[jj] += value[jj]; } 215153ef36baSBarry Smith bap += bs; 2152dd9472c6SBarry Smith } 21530e324ae4SSatish Balay } else { 215453ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 21559371c9d4SSatish Balay for (jj = 0; jj < bs; jj++) { bap[jj] = value[jj]; } 215653ef36baSBarry Smith bap += bs; 21578a84c255SSatish Balay } 2158dd9472c6SBarry Smith } 2159dd9472c6SBarry Smith } 2160f1241b54SBarry Smith goto noinsert2; 216192c4ed94SBarry Smith } 216292c4ed94SBarry Smith } 216389280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 21645f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 21657dc0baabSHong Zhang if (A->structure_only) { 21667dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 21677dc0baabSHong Zhang } else { 2168fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 21697dc0baabSHong Zhang } 21709371c9d4SSatish Balay N = nrow++ - 1; 21719371c9d4SSatish Balay high++; 217292c4ed94SBarry Smith /* shift up all the later entries in this row */ 21739566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 217492c4ed94SBarry Smith rp[i] = col; 21757dc0baabSHong Zhang if (!A->structure_only) { 21769566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 21778a84c255SSatish Balay bap = ap + bs2 * i; 21780e324ae4SSatish Balay if (roworiented) { 2179dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 21809371c9d4SSatish Balay for (jj = ii; jj < bs2; jj += bs) { bap[jj] = *value++; } 2181dd9472c6SBarry Smith } 21820e324ae4SSatish Balay } else { 2183dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 21849371c9d4SSatish Balay for (jj = 0; jj < bs; jj++) { *bap++ = *value++; } 2185dd9472c6SBarry Smith } 2186dd9472c6SBarry Smith } 21877dc0baabSHong Zhang } 2188f1241b54SBarry Smith noinsert2:; 218992c4ed94SBarry Smith low = i; 219092c4ed94SBarry Smith } 219192c4ed94SBarry Smith ailen[row] = nrow; 219292c4ed94SBarry Smith } 21933a40ed3dSBarry Smith PetscFunctionReturn(0); 219492c4ed94SBarry Smith } 219526e093fcSHong Zhang 21969371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) { 2197584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2198580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2199d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2200c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 22013f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 22023447b6efSHong Zhang PetscReal ratio = 0.6; 2203584200bdSSatish Balay 22043a40ed3dSBarry Smith PetscFunctionBegin; 22053a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2206584200bdSSatish Balay 220743ee02c3SBarry Smith if (m) rmax = ailen[0]; 2208584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2209584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2210584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2211d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2212584200bdSSatish Balay if (fshift) { 2213580bdb30SBarry Smith ip = aj + ai[i]; 2214580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2215584200bdSSatish Balay N = ailen[i]; 22169566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 2217*48a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2218672ba085SHong Zhang } 2219584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2220584200bdSSatish Balay } 2221584200bdSSatish Balay if (mbs) { 2222584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2223584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2224584200bdSSatish Balay } 22257c565772SBarry Smith 2226584200bdSSatish Balay /* reset ilen and imax for each row */ 22277c565772SBarry Smith a->nonzerorowcnt = 0; 2228672ba085SHong Zhang if (A->structure_only) { 22299566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2230672ba085SHong Zhang } else { /* !A->structure_only */ 2231584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2232584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22337c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2234584200bdSSatish Balay } 2235672ba085SHong Zhang } 2236a7c10996SSatish Balay a->nz = ai[mbs]; 2237584200bdSSatish Balay 2238584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2239b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2240584200bdSSatish Balay if (fshift && a->diag) { 22419566063dSJacob Faibussowitsch PetscCall(PetscFree(a->diag)); 22429566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -(mbs + 1) * sizeof(PetscInt))); 2243f4259b30SLisandro Dalcin a->diag = NULL; 2244584200bdSSatish Balay } 22455f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 22469566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 22479566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 22489566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 224926fbe8dcSKarl Rupp 22508e58a170SBarry Smith A->info.mallocs += a->reallocs; 2251e2f3b5e9SSatish Balay a->reallocs = 0; 22520e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2253647a6520SHong Zhang a->rmax = rmax; 2254cf4441caSHong Zhang 2255*48a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 22563a40ed3dSBarry Smith PetscFunctionReturn(0); 2257584200bdSSatish Balay } 2258584200bdSSatish Balay 2259bea157c4SSatish Balay /* 2260bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2261bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2262a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2263bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2264bea157c4SSatish Balay */ 22659371c9d4SSatish Balay static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) { 2266c1ac3661SBarry Smith PetscInt i, j, k, row; 2267ace3abfcSBarry Smith PetscBool flg; 22683a40ed3dSBarry Smith 2269433994e6SBarry Smith PetscFunctionBegin; 2270bea157c4SSatish Balay for (i = 0, j = 0; i < n; j++) { 2271bea157c4SSatish Balay row = idx[i]; 2272a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2273bea157c4SSatish Balay sizes[j] = 1; 2274bea157c4SSatish Balay i++; 2275e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2276bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2277bea157c4SSatish Balay i++; 22786aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2279bea157c4SSatish Balay flg = PETSC_TRUE; 2280bea157c4SSatish Balay for (k = 1; k < bs; k++) { 2281bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2282bea157c4SSatish Balay flg = PETSC_FALSE; 2283bea157c4SSatish Balay break; 2284d9b7c43dSSatish Balay } 2285bea157c4SSatish Balay } 2286abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2287bea157c4SSatish Balay sizes[j] = bs; 2288bea157c4SSatish Balay i += bs; 2289bea157c4SSatish Balay } else { 2290bea157c4SSatish Balay sizes[j] = 1; 2291bea157c4SSatish Balay i++; 2292bea157c4SSatish Balay } 2293bea157c4SSatish Balay } 2294bea157c4SSatish Balay } 2295bea157c4SSatish Balay *bs_max = j; 22963a40ed3dSBarry Smith PetscFunctionReturn(0); 2297d9b7c43dSSatish Balay } 2298d9b7c43dSSatish Balay 22999371c9d4SSatish Balay PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 2300d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2301f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2302d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 230387828ca2SBarry Smith PetscScalar zero = 0.0; 23043f1db9ecSBarry Smith MatScalar *aa; 230597b48c8fSBarry Smith const PetscScalar *xx; 230697b48c8fSBarry Smith PetscScalar *bb; 2307d9b7c43dSSatish Balay 23083a40ed3dSBarry Smith PetscFunctionBegin; 230997b48c8fSBarry Smith /* fix right hand side if needed */ 231097b48c8fSBarry Smith if (x && b) { 23119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23129566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 23139371c9d4SSatish Balay for (i = 0; i < is_n; i++) { bb[is_idx[i]] = diag * xx[is_idx[i]]; } 23149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23159566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 231697b48c8fSBarry Smith } 231797b48c8fSBarry Smith 2318d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2319bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2321bea157c4SSatish Balay 2322563b5814SBarry Smith /* copy IS values to rows, and sort them */ 232326fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23249566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 232597b48c8fSBarry Smith 2326a9817697SBarry Smith if (baij->keepnonzeropattern) { 232726fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2328dffd3267SBarry Smith bs_max = is_n; 2329dffd3267SBarry Smith } else { 23309566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2331e56f5c9eSBarry Smith A->nonzerostate++; 2332dffd3267SBarry Smith } 2333bea157c4SSatish Balay 2334bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2335bea157c4SSatish Balay row = rows[j]; 23365f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2337bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2338b31fbe3bSSatish Balay aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2339a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2340d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2341bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2342bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2343bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 234426fbe8dcSKarl Rupp 23459566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2346a07cd24cSSatish Balay } 2347563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2348*48a46eb9SPierre Jolivet for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2349f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2350bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2351f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2352bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 23536bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2354bea157c4SSatish Balay for (k = 0; k < count; k++) { 2355d9b7c43dSSatish Balay aa[0] = zero; 2356d9b7c43dSSatish Balay aa += bs; 2357d9b7c43dSSatish Balay } 2358*48a46eb9SPierre Jolivet if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2359d9b7c43dSSatish Balay } 2360bea157c4SSatish Balay } 2361bea157c4SSatish Balay 23629566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 23639566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 23643a40ed3dSBarry Smith PetscFunctionReturn(0); 2365d9b7c43dSSatish Balay } 23661c351548SSatish Balay 23679371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 236897b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 236997b48c8fSBarry Smith PetscInt i, j, k, count; 237097b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 237197b48c8fSBarry Smith PetscScalar zero = 0.0; 237297b48c8fSBarry Smith MatScalar *aa; 237397b48c8fSBarry Smith const PetscScalar *xx; 237497b48c8fSBarry Smith PetscScalar *bb; 237556777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 237697b48c8fSBarry Smith 237797b48c8fSBarry Smith PetscFunctionBegin; 237897b48c8fSBarry Smith /* fix right hand side if needed */ 237997b48c8fSBarry Smith if (x && b) { 23809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23819566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 238256777dd2SBarry Smith vecs = PETSC_TRUE; 238397b48c8fSBarry Smith } 238497b48c8fSBarry Smith 238597b48c8fSBarry Smith /* zero the columns */ 23869566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 238797b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 23885f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 238997b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 239097b48c8fSBarry Smith } 239197b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 239297b48c8fSBarry Smith if (!zeroed[i]) { 239397b48c8fSBarry Smith row = i / bs; 239497b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 239597b48c8fSBarry Smith for (k = 0; k < bs; k++) { 239697b48c8fSBarry Smith col = bs * baij->j[j] + k; 239797b48c8fSBarry Smith if (zeroed[col]) { 239897b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 239956777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 240097b48c8fSBarry Smith aa[0] = 0.0; 240197b48c8fSBarry Smith } 240297b48c8fSBarry Smith } 240397b48c8fSBarry Smith } 240456777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 240597b48c8fSBarry Smith } 24069566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 240756777dd2SBarry Smith if (vecs) { 24089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 24099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 241056777dd2SBarry Smith } 241197b48c8fSBarry Smith 241297b48c8fSBarry Smith /* zero the rows */ 241397b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 241497b48c8fSBarry Smith row = is_idx[i]; 241597b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 241697b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 241797b48c8fSBarry Smith for (k = 0; k < count; k++) { 241897b48c8fSBarry Smith aa[0] = zero; 241997b48c8fSBarry Smith aa += bs; 242097b48c8fSBarry Smith } 2421dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 242297b48c8fSBarry Smith } 24239566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 242497b48c8fSBarry Smith PetscFunctionReturn(0); 242597b48c8fSBarry Smith } 242697b48c8fSBarry Smith 24279371c9d4SSatish Balay PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 24282d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2429e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2430c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2431d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2432c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2433ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2434d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24352d61bbb3SSatish Balay 24362d61bbb3SSatish Balay PetscFunctionBegin; 24372d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2438085a36d4SBarry Smith row = im[k]; 2439085a36d4SBarry Smith brow = row / bs; 24405ef9f2a5SBarry Smith if (row < 0) continue; 24416bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24422d61bbb3SSatish Balay rp = aj + ai[brow]; 2443672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[brow]; 24442d61bbb3SSatish Balay rmax = imax[brow]; 24452d61bbb3SSatish Balay nrow = ailen[brow]; 24462d61bbb3SSatish Balay low = 0; 2447c71e6ed7SBarry Smith high = nrow; 24482d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 24495ef9f2a5SBarry Smith if (in[l] < 0) continue; 24506bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 24519371c9d4SSatish Balay col = in[l]; 24529371c9d4SSatish Balay bcol = col / bs; 24539371c9d4SSatish Balay ridx = row % bs; 24549371c9d4SSatish Balay cidx = col % bs; 2455672ba085SHong Zhang if (!A->structure_only) { 24562d61bbb3SSatish Balay if (roworiented) { 24575ef9f2a5SBarry Smith value = v[l + k * n]; 24582d61bbb3SSatish Balay } else { 24592d61bbb3SSatish Balay value = v[k + l * m]; 24602d61bbb3SSatish Balay } 2461672ba085SHong Zhang } 24629371c9d4SSatish Balay if (col <= lastcol) low = 0; 24639371c9d4SSatish Balay else high = nrow; 2464e2ee6c50SBarry Smith lastcol = col; 24652d61bbb3SSatish Balay while (high - low > 7) { 24662d61bbb3SSatish Balay t = (low + high) / 2; 24672d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 24682d61bbb3SSatish Balay else low = t; 24692d61bbb3SSatish Balay } 24702d61bbb3SSatish Balay for (i = low; i < high; i++) { 24712d61bbb3SSatish Balay if (rp[i] > bcol) break; 24722d61bbb3SSatish Balay if (rp[i] == bcol) { 24732d61bbb3SSatish Balay bap = ap + bs2 * i + bs * cidx + ridx; 2474672ba085SHong Zhang if (!A->structure_only) { 24752d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 24762d61bbb3SSatish Balay else *bap = value; 2477672ba085SHong Zhang } 24782d61bbb3SSatish Balay goto noinsert1; 24792d61bbb3SSatish Balay } 24802d61bbb3SSatish Balay } 24812d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 24825f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2483672ba085SHong Zhang if (A->structure_only) { 2484672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2485672ba085SHong Zhang } else { 2486fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2487672ba085SHong Zhang } 24889371c9d4SSatish Balay N = nrow++ - 1; 24899371c9d4SSatish Balay high++; 24902d61bbb3SSatish Balay /* shift up all the later entries in this row */ 24919566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 24922d61bbb3SSatish Balay rp[i] = bcol; 2493580bdb30SBarry Smith if (!A->structure_only) { 24949566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 24959566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2496580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2497580bdb30SBarry Smith } 2498085a36d4SBarry Smith a->nz++; 2499e56f5c9eSBarry Smith A->nonzerostate++; 25002d61bbb3SSatish Balay noinsert1:; 25012d61bbb3SSatish Balay low = i; 25022d61bbb3SSatish Balay } 25032d61bbb3SSatish Balay ailen[brow] = nrow; 25042d61bbb3SSatish Balay } 25052d61bbb3SSatish Balay PetscFunctionReturn(0); 25062d61bbb3SSatish Balay } 25072d61bbb3SSatish Balay 25089371c9d4SSatish Balay PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) { 25092d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 25102d61bbb3SSatish Balay Mat outA; 2511ace3abfcSBarry Smith PetscBool row_identity, col_identity; 25122d61bbb3SSatish Balay 25132d61bbb3SSatish Balay PetscFunctionBegin; 25145f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 25159566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25169566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25175f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25182d61bbb3SSatish Balay 25192d61bbb3SSatish Balay outA = inA; 2520d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25219566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25232d61bbb3SSatish Balay 25249566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2525cf242676SKris Buschelman 25269566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25279566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2528c3122656SLisandro Dalcin a->row = row; 25299566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2531c3122656SLisandro Dalcin a->col = col; 2532c38d4ed2SBarry Smith 2533c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25349566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25359566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 25369566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)inA, (PetscObject)a->icol)); 2537c38d4ed2SBarry Smith 25389566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2539c38d4ed2SBarry Smith if (!a->solve_work) { 25409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 25419566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)inA, (inA->rmap->N + inA->rmap->bs) * sizeof(PetscScalar))); 2542c38d4ed2SBarry Smith } 25439566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25442d61bbb3SSatish Balay PetscFunctionReturn(0); 25452d61bbb3SSatish Balay } 2546d9b7c43dSSatish Balay 25479371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) { 254827a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2549bdb1c0e1SJed Brown PetscInt i, nz, mbs; 255027a8da17SBarry Smith 255127a8da17SBarry Smith PetscFunctionBegin; 2552b32cb4a7SJed Brown nz = baij->maxnz; 2553bdb1c0e1SJed Brown mbs = baij->mbs; 25549371c9d4SSatish Balay for (i = 0; i < nz; i++) { baij->j[i] = indices[i]; } 255527a8da17SBarry Smith baij->nz = nz; 25569371c9d4SSatish Balay for (i = 0; i < mbs; i++) { baij->ilen[i] = baij->imax[i]; } 255727a8da17SBarry Smith PetscFunctionReturn(0); 255827a8da17SBarry Smith } 255927a8da17SBarry Smith 256027a8da17SBarry Smith /*@ 256127a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 256227a8da17SBarry Smith in the matrix. 256327a8da17SBarry Smith 256427a8da17SBarry Smith Input Parameters: 256527a8da17SBarry Smith + mat - the SeqBAIJ matrix 256627a8da17SBarry Smith - indices - the column indices 256727a8da17SBarry Smith 256815091d37SBarry Smith Level: advanced 256915091d37SBarry Smith 257027a8da17SBarry Smith Notes: 257127a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 257227a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 257327a8da17SBarry Smith of the MatSetValues() operation. 257427a8da17SBarry Smith 257527a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2576d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 257727a8da17SBarry Smith 257827a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 257927a8da17SBarry Smith 258027a8da17SBarry Smith @*/ 25819371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) { 258227a8da17SBarry Smith PetscFunctionBegin; 25830700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2584dadcf809SJacob Faibussowitsch PetscValidIntPointer(indices, 2); 2585cac4c232SBarry Smith PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 258627a8da17SBarry Smith PetscFunctionReturn(0); 258727a8da17SBarry Smith } 258827a8da17SBarry Smith 25899371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) { 2590273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2591c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2592273d9f13SBarry Smith PetscReal atmp; 259387828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2594273d9f13SBarry Smith MatScalar *aa; 2595c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2596273d9f13SBarry Smith 2597273d9f13SBarry Smith PetscFunctionBegin; 25985f80ce2aSJacob Faibussowitsch /* why is this not a macro???????????????????????????????????????????????????????????????? */ 25995f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2600d0f46423SBarry Smith bs = A->rmap->bs; 2601273d9f13SBarry Smith aa = a->a; 2602273d9f13SBarry Smith ai = a->i; 2603273d9f13SBarry Smith aj = a->j; 2604273d9f13SBarry Smith mbs = a->mbs; 2605273d9f13SBarry Smith 26069566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 26079566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 26089566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 26095f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2610273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 26119371c9d4SSatish Balay ncols = ai[1] - ai[0]; 26129371c9d4SSatish Balay ai++; 2613273d9f13SBarry Smith brow = bs * i; 2614273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2615273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2616273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 26179371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 26189371c9d4SSatish Balay aa++; 2619273d9f13SBarry Smith row = brow + krow; /* row index */ 26209371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 26219371c9d4SSatish Balay x[row] = atmp; 26229371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26239371c9d4SSatish Balay } 2624273d9f13SBarry Smith } 2625273d9f13SBarry Smith } 2626273d9f13SBarry Smith aj++; 2627273d9f13SBarry Smith } 2628273d9f13SBarry Smith } 26299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 2630273d9f13SBarry Smith PetscFunctionReturn(0); 2631273d9f13SBarry Smith } 2632273d9f13SBarry Smith 26339371c9d4SSatish Balay PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) { 26343c896bc6SHong Zhang PetscFunctionBegin; 26353c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 26363c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 26373c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26383c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2639d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 26403c896bc6SHong Zhang 26415f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 26425f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 26439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 26449566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 26453c896bc6SHong Zhang } else { 26469566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 26473c896bc6SHong Zhang } 26483c896bc6SHong Zhang PetscFunctionReturn(0); 26493c896bc6SHong Zhang } 26503c896bc6SHong Zhang 26519371c9d4SSatish Balay PetscErrorCode MatSetUp_SeqBAIJ(Mat A) { 2652273d9f13SBarry Smith PetscFunctionBegin; 26539566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2654273d9f13SBarry Smith PetscFunctionReturn(0); 2655273d9f13SBarry Smith } 2656273d9f13SBarry Smith 26579371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2658f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26596e111a19SKarl Rupp 2660f2a5309cSSatish Balay PetscFunctionBegin; 2661f2a5309cSSatish Balay *array = a->a; 2662f2a5309cSSatish Balay PetscFunctionReturn(0); 2663f2a5309cSSatish Balay } 2664f2a5309cSSatish Balay 26659371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2666f2a5309cSSatish Balay PetscFunctionBegin; 2667cda14afcSprj- *array = NULL; 2668f2a5309cSSatish Balay PetscFunctionReturn(0); 2669f2a5309cSSatish Balay } 2670f2a5309cSSatish Balay 26719371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) { 2672b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 267352768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 267452768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 267552768537SHong Zhang 267652768537SHong Zhang PetscFunctionBegin; 267752768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 26789566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 267952768537SHong Zhang PetscFunctionReturn(0); 268052768537SHong Zhang } 268152768537SHong Zhang 26829371c9d4SSatish Balay PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 268342ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 268431ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2685e838b9e7SJed Brown PetscBLASInt one = 1; 268642ee4b1aSHong Zhang 268742ee4b1aSHong Zhang PetscFunctionBegin; 2688134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2689134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2690134adf20SPierre Jolivet if (e) { 26919566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2692134adf20SPierre Jolivet if (e) { 26939566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2694134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2695134adf20SPierre Jolivet } 2696134adf20SPierre Jolivet } 269754c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2698134adf20SPierre Jolivet } 269942ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2700f4df32b1SMatthew Knepley PetscScalar alpha = a; 2701c5df96a5SBarry Smith PetscBLASInt bnz; 27029566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2703792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 27049566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2705ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 27069566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 270742ee4b1aSHong Zhang } else { 270852768537SHong Zhang Mat B; 270952768537SHong Zhang PetscInt *nnz; 271054c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 27129566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 27139566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 27149566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 27159566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 27169566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 27179566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 27189566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 27199566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 27209566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 27219566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 272242ee4b1aSHong Zhang } 272342ee4b1aSHong Zhang PetscFunctionReturn(0); 272442ee4b1aSHong Zhang } 272542ee4b1aSHong Zhang 27269371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) { 27272726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 27282726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27292726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 27302726fb6dSPierre Jolivet MatScalar *aa = a->a; 27312726fb6dSPierre Jolivet 27322726fb6dSPierre Jolivet PetscFunctionBegin; 27332726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 27342726fb6dSPierre Jolivet #else 27352726fb6dSPierre Jolivet PetscFunctionBegin; 27362726fb6dSPierre Jolivet #endif 27372726fb6dSPierre Jolivet PetscFunctionReturn(0); 27382726fb6dSPierre Jolivet } 27392726fb6dSPierre Jolivet 27409371c9d4SSatish Balay PetscErrorCode MatRealPart_SeqBAIJ(Mat A) { 274199cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 274299cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2743dd6ea824SBarry Smith MatScalar *aa = a->a; 274499cafbc1SBarry Smith 274599cafbc1SBarry Smith PetscFunctionBegin; 274699cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 274799cafbc1SBarry Smith PetscFunctionReturn(0); 274899cafbc1SBarry Smith } 274999cafbc1SBarry Smith 27509371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) { 275199cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 275299cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2753dd6ea824SBarry Smith MatScalar *aa = a->a; 275499cafbc1SBarry Smith 275599cafbc1SBarry Smith PetscFunctionBegin; 275699cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 275799cafbc1SBarry Smith PetscFunctionReturn(0); 275899cafbc1SBarry Smith } 275999cafbc1SBarry Smith 27603acb8795SBarry Smith /* 27612479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 27623acb8795SBarry Smith */ 27639371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 27643acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27653acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 27663acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 27673acb8795SBarry Smith 27683acb8795SBarry Smith PetscFunctionBegin; 27693acb8795SBarry Smith *nn = n; 27703acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 27715f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 27729566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 27739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 27749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 27753acb8795SBarry Smith jj = a->j; 27769371c9d4SSatish Balay for (i = 0; i < nz; i++) { collengths[jj[i]]++; } 27773acb8795SBarry Smith cia[0] = oshift; 27789371c9d4SSatish Balay for (i = 0; i < n; i++) { cia[i + 1] = cia[i] + collengths[i]; } 27799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 27803acb8795SBarry Smith jj = a->j; 27813acb8795SBarry Smith for (row = 0; row < m; row++) { 27823acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 27833acb8795SBarry Smith for (i = 0; i < mr; i++) { 27843acb8795SBarry Smith col = *jj++; 278526fbe8dcSKarl Rupp 27863acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 27873acb8795SBarry Smith } 27883acb8795SBarry Smith } 27899566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 27909371c9d4SSatish Balay *ia = cia; 27919371c9d4SSatish Balay *ja = cja; 27923acb8795SBarry Smith PetscFunctionReturn(0); 27933acb8795SBarry Smith } 27943acb8795SBarry Smith 27959371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 27963acb8795SBarry Smith PetscFunctionBegin; 27973acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 27989566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 27999566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 28003acb8795SBarry Smith PetscFunctionReturn(0); 28013acb8795SBarry Smith } 28023acb8795SBarry Smith 2803525d23c0SHong Zhang /* 2804525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2805525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2806040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2807525d23c0SHong Zhang */ 28089371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2809525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2810c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2811525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2812525d23c0SHong Zhang PetscInt *cspidx; 2813f6d58c54SBarry Smith 2814f6d58c54SBarry Smith PetscFunctionBegin; 2815525d23c0SHong Zhang *nn = n; 2816525d23c0SHong Zhang if (!ia) PetscFunctionReturn(0); 2817f6d58c54SBarry Smith 28189566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2822525d23c0SHong Zhang jj = a->j; 28239371c9d4SSatish Balay for (i = 0; i < nz; i++) { collengths[jj[i]]++; } 2824525d23c0SHong Zhang cia[0] = oshift; 28259371c9d4SSatish Balay for (i = 0; i < n; i++) { cia[i + 1] = cia[i] + collengths[i]; } 28269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2827525d23c0SHong Zhang jj = a->j; 2828525d23c0SHong Zhang for (row = 0; row < m; row++) { 2829525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2830525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2831525d23c0SHong Zhang col = *jj++; 2832525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2833525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2834525d23c0SHong Zhang } 2835525d23c0SHong Zhang } 28369566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2837071fcb05SBarry Smith *ia = cia; 2838071fcb05SBarry Smith *ja = cja; 2839525d23c0SHong Zhang *spidx = cspidx; 2840525d23c0SHong Zhang PetscFunctionReturn(0); 2841f6d58c54SBarry Smith } 2842f6d58c54SBarry Smith 28439371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2844525d23c0SHong Zhang PetscFunctionBegin; 28459566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 28469566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 2847f6d58c54SBarry Smith PetscFunctionReturn(0); 2848f6d58c54SBarry Smith } 284999cafbc1SBarry Smith 28509371c9d4SSatish Balay PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) { 28517d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 28527d68702bSBarry Smith 28537d68702bSBarry Smith PetscFunctionBegin; 2854*48a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 28559566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 28567d68702bSBarry Smith PetscFunctionReturn(0); 28577d68702bSBarry Smith } 28587d68702bSBarry Smith 28592593348eSBarry Smith /* -------------------------------------------------------------------*/ 28609371c9d4SSatish Balay static struct _MatOps MatOps_Values = { 28619371c9d4SSatish Balay MatSetValues_SeqBAIJ, 2862cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2863cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2864cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 286597304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 28667c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 28677c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2868f4259b30SLisandro Dalcin NULL, 2869f4259b30SLisandro Dalcin NULL, 2870f4259b30SLisandro Dalcin NULL, 2871f4259b30SLisandro Dalcin /* 10*/ NULL, 2872cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2873f4259b30SLisandro Dalcin NULL, 2874f4259b30SLisandro Dalcin NULL, 2875f2501298SSatish Balay MatTranspose_SeqBAIJ, 287697304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2877cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2878cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2879cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2880cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2881f4259b30SLisandro Dalcin /* 20*/ NULL, 2882cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2883cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2884cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2885d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2886f4259b30SLisandro Dalcin NULL, 2887f4259b30SLisandro Dalcin NULL, 2888f4259b30SLisandro Dalcin NULL, 2889f4259b30SLisandro Dalcin NULL, 28904994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2891f4259b30SLisandro Dalcin NULL, 2892f4259b30SLisandro Dalcin NULL, 2893f4259b30SLisandro Dalcin NULL, 2894f4259b30SLisandro Dalcin NULL, 2895d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2896f4259b30SLisandro Dalcin NULL, 2897f4259b30SLisandro Dalcin NULL, 2898cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2899f4259b30SLisandro Dalcin NULL, 2900d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 29017dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2902cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2903cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 29043c896bc6SHong Zhang MatCopy_SeqBAIJ, 2905f4259b30SLisandro Dalcin /* 44*/ NULL, 2906cc2dc46cSBarry Smith MatScale_SeqBAIJ, 29077d68702bSBarry Smith MatShift_SeqBAIJ, 2908f4259b30SLisandro Dalcin NULL, 290997b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2910f4259b30SLisandro Dalcin /* 49*/ NULL, 29113b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 291292c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 29133acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 29143acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 291593dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2916f4259b30SLisandro Dalcin NULL, 2917f4259b30SLisandro Dalcin NULL, 2918090001bdSToby Isaac NULL, 2919d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 29207dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2921b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2922b9b97703SBarry Smith MatView_SeqBAIJ, 2923f4259b30SLisandro Dalcin NULL, 2924f4259b30SLisandro Dalcin NULL, 2925f4259b30SLisandro Dalcin /* 64*/ NULL, 2926f4259b30SLisandro Dalcin NULL, 2927f4259b30SLisandro Dalcin NULL, 2928f4259b30SLisandro Dalcin NULL, 2929f4259b30SLisandro Dalcin NULL, 2930d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2931f4259b30SLisandro Dalcin NULL, 2932c87e5d42SMatthew Knepley MatConvert_Basic, 2933f4259b30SLisandro Dalcin NULL, 2934f4259b30SLisandro Dalcin NULL, 2935f4259b30SLisandro Dalcin /* 74*/ NULL, 2936f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2937f4259b30SLisandro Dalcin NULL, 2938f4259b30SLisandro Dalcin NULL, 2939f4259b30SLisandro Dalcin NULL, 2940f4259b30SLisandro Dalcin /* 79*/ NULL, 2941f4259b30SLisandro Dalcin NULL, 2942f4259b30SLisandro Dalcin NULL, 2943f4259b30SLisandro Dalcin NULL, 29445bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2945f4259b30SLisandro Dalcin /* 84*/ NULL, 2946f4259b30SLisandro Dalcin NULL, 2947f4259b30SLisandro Dalcin NULL, 2948f4259b30SLisandro Dalcin NULL, 2949f4259b30SLisandro Dalcin NULL, 2950f4259b30SLisandro Dalcin /* 89*/ NULL, 2951f4259b30SLisandro Dalcin NULL, 2952f4259b30SLisandro Dalcin NULL, 2953f4259b30SLisandro Dalcin NULL, 2954f4259b30SLisandro Dalcin NULL, 2955f4259b30SLisandro Dalcin /* 94*/ NULL, 2956f4259b30SLisandro Dalcin NULL, 2957f4259b30SLisandro Dalcin NULL, 2958f4259b30SLisandro Dalcin NULL, 2959f4259b30SLisandro Dalcin NULL, 2960f4259b30SLisandro Dalcin /* 99*/ NULL, 2961f4259b30SLisandro Dalcin NULL, 2962f4259b30SLisandro Dalcin NULL, 29632726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 2964f4259b30SLisandro Dalcin NULL, 2965f4259b30SLisandro Dalcin /*104*/ NULL, 296699cafbc1SBarry Smith MatRealPart_SeqBAIJ, 29672af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 2968f4259b30SLisandro Dalcin NULL, 2969f4259b30SLisandro Dalcin NULL, 2970f4259b30SLisandro Dalcin /*109*/ NULL, 2971f4259b30SLisandro Dalcin NULL, 2972f4259b30SLisandro Dalcin NULL, 2973f4259b30SLisandro Dalcin NULL, 2974547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2975f4259b30SLisandro Dalcin /*114*/ NULL, 2976f4259b30SLisandro Dalcin NULL, 2977f4259b30SLisandro Dalcin NULL, 2978f4259b30SLisandro Dalcin NULL, 2979f4259b30SLisandro Dalcin NULL, 2980f4259b30SLisandro Dalcin /*119*/ NULL, 2981f4259b30SLisandro Dalcin NULL, 2982547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2983d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2984f4259b30SLisandro Dalcin NULL, 2985f4259b30SLisandro Dalcin /*124*/ NULL, 2986857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 29873964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 2988f4259b30SLisandro Dalcin NULL, 2989f4259b30SLisandro Dalcin NULL, 2990f4259b30SLisandro Dalcin /*129*/ NULL, 2991f4259b30SLisandro Dalcin NULL, 2992f4259b30SLisandro Dalcin NULL, 2993f4259b30SLisandro Dalcin NULL, 2994f4259b30SLisandro Dalcin NULL, 2995f4259b30SLisandro Dalcin /*134*/ NULL, 2996f4259b30SLisandro Dalcin NULL, 2997f4259b30SLisandro Dalcin NULL, 2998f4259b30SLisandro Dalcin NULL, 2999f4259b30SLisandro Dalcin NULL, 300046533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 3001f4259b30SLisandro Dalcin NULL, 3002f4259b30SLisandro Dalcin NULL, 3003bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 3004f4259b30SLisandro Dalcin NULL, 300586e85357SHong Zhang /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3006d70f29a3SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 3007d70f29a3SPierre Jolivet NULL, 300899a7f59eSMark Adams NULL, 300999a7f59eSMark Adams NULL, 30107fb60732SBarry Smith NULL, 30117fb60732SBarry Smith /*150*/ NULL, 301299cafbc1SBarry Smith }; 30132593348eSBarry Smith 30149371c9d4SSatish Balay PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) { 30153e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30168ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30173e90b805SBarry Smith 30183e90b805SBarry Smith PetscFunctionBegin; 30195f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30203e90b805SBarry Smith 30213e90b805SBarry Smith /* allocate space for values if not already there */ 30223e90b805SBarry Smith if (!aij->saved_values) { 30239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 30249566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat, (nz + 1) * sizeof(PetscScalar))); 30253e90b805SBarry Smith } 30263e90b805SBarry Smith 30273e90b805SBarry Smith /* copy values over */ 30289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 30293e90b805SBarry Smith PetscFunctionReturn(0); 30303e90b805SBarry Smith } 30313e90b805SBarry Smith 30329371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) { 30333e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30348ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30353e90b805SBarry Smith 30363e90b805SBarry Smith PetscFunctionBegin; 30375f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30385f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 30393e90b805SBarry Smith 30403e90b805SBarry Smith /* copy values over */ 30419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 30423e90b805SBarry Smith PetscFunctionReturn(0); 30433e90b805SBarry Smith } 30443e90b805SBarry Smith 3045cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3046cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3047273d9f13SBarry Smith 30489371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) { 3049a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 3050535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 30518afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3052a23d5eceSKris Buschelman 3053a23d5eceSKris Buschelman PetscFunctionBegin; 30542576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3055ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3056ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3057ab93d7beSBarry Smith nz = 0; 3058ab93d7beSBarry Smith } 30598c07d4e3SBarry Smith 30609566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 30619566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 30629566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 30639566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3064899cda47SBarry Smith 3065899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3066899cda47SBarry Smith 3067d0f46423SBarry Smith mbs = B->rmap->n / bs; 3068d0f46423SBarry Smith nbs = B->cmap->n / bs; 3069a23d5eceSKris Buschelman bs2 = bs * bs; 3070a23d5eceSKris Buschelman 30715f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3072a23d5eceSKris Buschelman 3073a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 30745f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3075a23d5eceSKris Buschelman if (nnz) { 3076a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 30775f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 30785f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3079a23d5eceSKris Buschelman } 3080a23d5eceSKris Buschelman } 3081a23d5eceSKris Buschelman 3082a23d5eceSKris Buschelman b = (Mat_SeqBAIJ *)B->data; 3083d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 30849566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3085d0609cedSBarry Smith PetscOptionsEnd(); 30868c07d4e3SBarry Smith 3087a23d5eceSKris Buschelman if (!flg) { 3088a23d5eceSKris Buschelman switch (bs) { 3089a23d5eceSKris Buschelman case 1: 3090a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3091a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3092a23d5eceSKris Buschelman break; 3093a23d5eceSKris Buschelman case 2: 3094a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3095a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3096a23d5eceSKris Buschelman break; 3097a23d5eceSKris Buschelman case 3: 3098a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3099a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3100a23d5eceSKris Buschelman break; 3101a23d5eceSKris Buschelman case 4: 3102a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3103a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3104a23d5eceSKris Buschelman break; 3105a23d5eceSKris Buschelman case 5: 3106a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3107a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3108a23d5eceSKris Buschelman break; 3109a23d5eceSKris Buschelman case 6: 3110a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3111a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3112a23d5eceSKris Buschelman break; 3113a23d5eceSKris Buschelman case 7: 3114a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3115a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3116a23d5eceSKris Buschelman break; 31179371c9d4SSatish Balay case 9: { 31186679dcc1SBarry Smith PetscInt version = 1; 31199566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31206679dcc1SBarry Smith switch (version) { 31215f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 31226679dcc1SBarry Smith case 1: 312396e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 312496e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 31259566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31266679dcc1SBarry Smith break; 31276679dcc1SBarry Smith #endif 31286679dcc1SBarry Smith default: 312996e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 313096e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31319566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 313296e086a2SDaniel Kokron break; 31336679dcc1SBarry Smith } 31346679dcc1SBarry Smith break; 31356679dcc1SBarry Smith } 3136ebada01fSBarry Smith case 11: 3137ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3138ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3139ebada01fSBarry Smith break; 31409371c9d4SSatish Balay case 12: { 31416679dcc1SBarry Smith PetscInt version = 1; 31429566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31436679dcc1SBarry Smith switch (version) { 31446679dcc1SBarry Smith case 1: 31456679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 31466679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 31479566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31488ab949d8SShri Abhyankar break; 31496679dcc1SBarry Smith case 2: 31506679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 31516679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 31529566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31536679dcc1SBarry Smith break; 31546679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 31556679dcc1SBarry Smith case 3: 31566679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 31576679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 31589566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31596679dcc1SBarry Smith break; 31606679dcc1SBarry Smith #endif 3161a23d5eceSKris Buschelman default: 3162a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3163a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31649566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31656679dcc1SBarry Smith break; 31666679dcc1SBarry Smith } 31676679dcc1SBarry Smith break; 31686679dcc1SBarry Smith } 31699371c9d4SSatish Balay case 15: { 31706679dcc1SBarry Smith PetscInt version = 1; 31719566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31726679dcc1SBarry Smith switch (version) { 31736679dcc1SBarry Smith case 1: 31746679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 31759566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31766679dcc1SBarry Smith break; 31776679dcc1SBarry Smith case 2: 31786679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 31799566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31806679dcc1SBarry Smith break; 31816679dcc1SBarry Smith case 3: 31826679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 31839566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31846679dcc1SBarry Smith break; 31856679dcc1SBarry Smith case 4: 31866679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 31879566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31886679dcc1SBarry Smith break; 31896679dcc1SBarry Smith default: 31906679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 31919566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31926679dcc1SBarry Smith break; 31936679dcc1SBarry Smith } 31946679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31956679dcc1SBarry Smith break; 31966679dcc1SBarry Smith } 31976679dcc1SBarry Smith default: 31986679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 31996679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 32009566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3201a23d5eceSKris Buschelman break; 3202a23d5eceSKris Buschelman } 3203a23d5eceSKris Buschelman } 3204e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3205a23d5eceSKris Buschelman b->mbs = mbs; 3206a23d5eceSKris Buschelman b->nbs = nbs; 3207ab93d7beSBarry Smith if (!skipallocation) { 32082ee49352SLisandro Dalcin if (!b->imax) { 32099566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 32109566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, 2 * mbs * sizeof(PetscInt))); 321126fbe8dcSKarl Rupp 32124fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 32132ee49352SLisandro Dalcin } 3214ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 321526fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3216a23d5eceSKris Buschelman if (!nnz) { 3217a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3218c62bd62aSJed Brown else if (nz < 0) nz = 1; 32195d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3220a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 32219566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3222a23d5eceSKris Buschelman } else { 3223c73702f5SBarry Smith PetscInt64 nz64 = 0; 32249371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 32259371c9d4SSatish Balay b->imax[i] = nnz[i]; 32269371c9d4SSatish Balay nz64 += nnz[i]; 32279371c9d4SSatish Balay } 32289566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3229a23d5eceSKris Buschelman } 3230a23d5eceSKris Buschelman 3231a23d5eceSKris Buschelman /* allocate the matrix space */ 32329566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3233672ba085SHong Zhang if (B->structure_only) { 32349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &b->j)); 32359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 32369566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * sizeof(PetscInt))); 3237672ba085SHong Zhang } else { 32386679dcc1SBarry Smith PetscInt nzbs2 = 0; 32399566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 32409566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 32419566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)))); 32429566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->a, nz * bs2)); 3243672ba085SHong Zhang } 32449566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->j, nz)); 324526fbe8dcSKarl Rupp 3246672ba085SHong Zhang if (B->structure_only) { 3247672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3248672ba085SHong Zhang b->free_a = PETSC_FALSE; 3249672ba085SHong Zhang } else { 3250a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3251672ba085SHong Zhang b->free_a = PETSC_TRUE; 3252672ba085SHong Zhang } 3253672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3254672ba085SHong Zhang 3255a23d5eceSKris Buschelman b->i[0] = 0; 32569371c9d4SSatish Balay for (i = 1; i < mbs + 1; i++) { b->i[i] = b->i[i - 1] + b->imax[i - 1]; } 3257672ba085SHong Zhang 3258e811da20SHong Zhang } else { 3259e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3260e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3261ab93d7beSBarry Smith } 3262a23d5eceSKris Buschelman 3263a23d5eceSKris Buschelman b->bs2 = bs2; 3264a23d5eceSKris Buschelman b->mbs = mbs; 3265a23d5eceSKris Buschelman b->nz = 0; 3266b32cb4a7SJed Brown b->maxnz = nz; 3267b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3268cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3269cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 32709566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3271a23d5eceSKris Buschelman PetscFunctionReturn(0); 3272a23d5eceSKris Buschelman } 3273a23d5eceSKris Buschelman 32749371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 3275725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3276f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3277d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3278725b52f3SLisandro Dalcin 3279725b52f3SLisandro Dalcin PetscFunctionBegin; 32805f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 32819566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 32829566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 32839566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32849566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 32859566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3286d0f46423SBarry Smith m = B->rmap->n / bs; 3287725b52f3SLisandro Dalcin 32885f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 32899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3290725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3291cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 32925f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3293725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3294725b52f3SLisandro Dalcin nnz[i] = nz; 3295725b52f3SLisandro Dalcin } 32969566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 32979566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3298725b52f3SLisandro Dalcin 3299725b52f3SLisandro Dalcin values = (PetscScalar *)V; 3300*48a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3301725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3302cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3303cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3304bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3305cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 33069566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 33073adadaf3SJed Brown } else { 33083adadaf3SJed Brown PetscInt j; 33093adadaf3SJed Brown for (j = 0; j < ncols; j++) { 33103adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 33119566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 33123adadaf3SJed Brown } 33133adadaf3SJed Brown } 3314725b52f3SLisandro Dalcin } 33159566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 33169566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 33179566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 33189566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3319725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3320725b52f3SLisandro Dalcin } 3321725b52f3SLisandro Dalcin 3322cda14afcSprj- /*@C 3323cda14afcSprj- MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3324cda14afcSprj- 3325cda14afcSprj- Not Collective 3326cda14afcSprj- 3327cda14afcSprj- Input Parameter: 3328cda14afcSprj- . mat - a MATSEQBAIJ matrix 3329cda14afcSprj- 3330cda14afcSprj- Output Parameter: 3331cda14afcSprj- . array - pointer to the data 3332cda14afcSprj- 3333cda14afcSprj- Level: intermediate 3334cda14afcSprj- 3335db781477SPatrick Sanan .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3336cda14afcSprj- @*/ 33379371c9d4SSatish Balay PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) { 3338cda14afcSprj- PetscFunctionBegin; 3339cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3340cda14afcSprj- PetscFunctionReturn(0); 3341cda14afcSprj- } 3342cda14afcSprj- 3343cda14afcSprj- /*@C 3344cda14afcSprj- MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3345cda14afcSprj- 3346cda14afcSprj- Not Collective 3347cda14afcSprj- 3348cda14afcSprj- Input Parameters: 3349cda14afcSprj- + mat - a MATSEQBAIJ matrix 3350cda14afcSprj- - array - pointer to the data 3351cda14afcSprj- 3352cda14afcSprj- Level: intermediate 3353cda14afcSprj- 3354db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3355cda14afcSprj- @*/ 33569371c9d4SSatish Balay PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) { 3357cda14afcSprj- PetscFunctionBegin; 3358cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3359cda14afcSprj- PetscFunctionReturn(0); 3360cda14afcSprj- } 3361cda14afcSprj- 33620bad9183SKris Buschelman /*MC 3363fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 33640bad9183SKris Buschelman block sparse compressed row format. 33650bad9183SKris Buschelman 33660bad9183SKris Buschelman Options Database Keys: 33676679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 33686679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 33690bad9183SKris Buschelman 33700bad9183SKris Buschelman Level: beginner 33710cd7f59aSBarry Smith 33720cd7f59aSBarry Smith Notes: 33730cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 33740cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 33750bad9183SKris Buschelman 33766679dcc1SBarry Smith Run with -info to see what version of the matrix-vector product is being used 33776679dcc1SBarry Smith 3378db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()` 33790bad9183SKris Buschelman M*/ 33800bad9183SKris Buschelman 3381cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3382b24902e0SBarry Smith 33839371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) { 3384c1ac3661SBarry Smith PetscMPIInt size; 3385b6490206SBarry Smith Mat_SeqBAIJ *b; 33863b2fbd54SBarry Smith 33873a40ed3dSBarry Smith PetscFunctionBegin; 33889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 33895f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3390b6490206SBarry Smith 33919566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B, &b)); 3392b0a32e0cSBarry Smith B->data = (void *)b; 33939566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 339426fbe8dcSKarl Rupp 3395f4259b30SLisandro Dalcin b->row = NULL; 3396f4259b30SLisandro Dalcin b->col = NULL; 3397f4259b30SLisandro Dalcin b->icol = NULL; 33982593348eSBarry Smith b->reallocs = 0; 3399f4259b30SLisandro Dalcin b->saved_values = NULL; 34002593348eSBarry Smith 3401c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 34022593348eSBarry Smith b->nonew = 0; 3403f4259b30SLisandro Dalcin b->diag = NULL; 3404f4259b30SLisandro Dalcin B->spptr = NULL; 3405b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3406a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 34074e220ebcSLois Curfman McInnes 34089566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 34099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 34109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 34119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 34129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 34139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 34149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 34159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 34169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 34179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 34187ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 34199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 34207ea3e4caSstefano_zampini #endif 34219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 34229566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 34233a40ed3dSBarry Smith PetscFunctionReturn(0); 34242593348eSBarry Smith } 34252593348eSBarry Smith 34269371c9d4SSatish Balay PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) { 3427b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3428a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3429de6a44a3SBarry Smith 34303a40ed3dSBarry Smith PetscFunctionBegin; 34315f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 34322593348eSBarry Smith 34334fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34344fd072dbSBarry Smith c->imax = a->imax; 34354fd072dbSBarry Smith c->ilen = a->ilen; 34364fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 34374fd072dbSBarry Smith } else { 34389566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 34399566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)C, 2 * mbs * sizeof(PetscInt))); 3440b6490206SBarry Smith for (i = 0; i < mbs; i++) { 34412593348eSBarry Smith c->imax[i] = a->imax[i]; 34422593348eSBarry Smith c->ilen[i] = a->ilen[i]; 34432593348eSBarry Smith } 34444fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 34454fd072dbSBarry Smith } 34462593348eSBarry Smith 34472593348eSBarry Smith /* allocate the matrix space */ 344816a2bf60SHong Zhang if (mallocmatspace) { 34494fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34509566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 34519566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * bs2 * sizeof(PetscScalar))); 345226fbe8dcSKarl Rupp 34534fd072dbSBarry Smith c->i = a->i; 34544fd072dbSBarry Smith c->j = a->j; 3455379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3456379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3457379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 34584fd072dbSBarry Smith c->parent = A; 34591e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 34601e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 346126fbe8dcSKarl Rupp 34629566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 34639566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34649566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34654fd072dbSBarry Smith } else { 34669566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 34679566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)) + (mbs + 1) * sizeof(PetscInt))); 346826fbe8dcSKarl Rupp 3469c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3470379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 34714fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 347226fbe8dcSKarl Rupp 34739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3474b6490206SBarry Smith if (mbs > 0) { 34759566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 34762e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 34779566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 34782e8a6d31SBarry Smith } else { 34799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 34802593348eSBarry Smith } 34812593348eSBarry Smith } 34821e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 34831e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 348416a2bf60SHong Zhang } 34854fd072dbSBarry Smith } 348616a2bf60SHong Zhang 34872593348eSBarry Smith c->roworiented = a->roworiented; 34882593348eSBarry Smith c->nonew = a->nonew; 348926fbe8dcSKarl Rupp 34909566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 34919566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 349226fbe8dcSKarl Rupp 34935c9eb25fSBarry Smith c->bs2 = a->bs2; 34945c9eb25fSBarry Smith c->mbs = a->mbs; 34955c9eb25fSBarry Smith c->nbs = a->nbs; 34962593348eSBarry Smith 34972593348eSBarry Smith if (a->diag) { 34984fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34994fd072dbSBarry Smith c->diag = a->diag; 35004fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 35014fd072dbSBarry Smith } else { 35029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 35039566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)C, (mbs + 1) * sizeof(PetscInt))); 350426fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 35054fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 35064fd072dbSBarry Smith } 3507f4259b30SLisandro Dalcin } else c->diag = NULL; 350826fbe8dcSKarl Rupp 35092593348eSBarry Smith c->nz = a->nz; 3510f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3511f361c04dSBarry Smith c->solve_work = NULL; 3512f361c04dSBarry Smith c->mult_work = NULL; 3513f361c04dSBarry Smith c->sor_workt = NULL; 3514f361c04dSBarry Smith c->sor_work = NULL; 351588e51ccdSHong Zhang 351688e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 351788e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3518cd6b891eSBarry Smith if (a->compressedrow.use) { 351988e51ccdSHong Zhang i = a->compressedrow.nrows; 35209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 35219566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)C, (2 * i + 1) * sizeof(PetscInt))); 35229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 35239566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 352488e51ccdSHong Zhang } else { 352588e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 35260298fd71SBarry Smith c->compressedrow.i = NULL; 35270298fd71SBarry Smith c->compressedrow.rindex = NULL; 352888e51ccdSHong Zhang } 3529e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 353026fbe8dcSKarl Rupp 35319566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 35323a40ed3dSBarry Smith PetscFunctionReturn(0); 35332593348eSBarry Smith } 35342593348eSBarry Smith 35359371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) { 3536b24902e0SBarry Smith PetscFunctionBegin; 35379566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 35389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 35399566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 35409566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3541b24902e0SBarry Smith PetscFunctionReturn(0); 3542b24902e0SBarry Smith } 3543b24902e0SBarry Smith 3544618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 35459371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 3546b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3547b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3548b51a4376SLisandro Dalcin PetscScalar *matvals; 3549b51a4376SLisandro Dalcin 3550b51a4376SLisandro Dalcin PetscFunctionBegin; 35519566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3552b51a4376SLisandro Dalcin 3553b51a4376SLisandro Dalcin /* read matrix header */ 35549566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 35555f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 35569371c9d4SSatish Balay M = header[1]; 35579371c9d4SSatish Balay N = header[2]; 35589371c9d4SSatish Balay nz = header[3]; 35595f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 35605f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 35615f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3562b51a4376SLisandro Dalcin 3563b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 35649566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3565b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3566b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3567b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3568b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3569b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 35709566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 35719566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3572b51a4376SLisandro Dalcin 3573b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 35749566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 35755f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 35769566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 35779566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 35789371c9d4SSatish Balay mbs = m / bs; 35799371c9d4SSatish Balay nbs = n / bs; 3580b51a4376SLisandro Dalcin 3581b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 35829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 35839566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 35849371c9d4SSatish Balay rowidxs[0] = 0; 35859371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3586b51a4376SLisandro Dalcin sum = rowidxs[m]; 35875f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3588b51a4376SLisandro Dalcin 3589b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 35909566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 35919566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 35929566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3593b51a4376SLisandro Dalcin 3594b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3595b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3596b51a4376SLisandro Dalcin PetscInt *nnz; 3597618cc2edSLisandro Dalcin PetscBool sbaij; 3598b51a4376SLisandro Dalcin 35999566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 36009566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 36019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3602b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 36039566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3604618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3605618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3606618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3607618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3608618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3609618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3610618cc2edSLisandro Dalcin } 3611618cc2edSLisandro Dalcin } 3612b51a4376SLisandro Dalcin } 36139566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 36149566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 36159566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 36169566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3617b51a4376SLisandro Dalcin } 3618b51a4376SLisandro Dalcin 3619b51a4376SLisandro Dalcin /* store matrix values */ 3620b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3621b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 36229566063dSJacob Faibussowitsch PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3623b51a4376SLisandro Dalcin } 3624b51a4376SLisandro Dalcin 36259566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 36269566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 36279566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 36289566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3629b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3630b51a4376SLisandro Dalcin } 3631b51a4376SLisandro Dalcin 36329371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) { 36337f489da9SVaclav Hapla PetscBool isbinary; 3634f501eaabSShri Abhyankar 3635f501eaabSShri Abhyankar PetscFunctionBegin; 36369566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 36375f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 36389566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3639f501eaabSShri Abhyankar PetscFunctionReturn(0); 3640f501eaabSShri Abhyankar } 3641f501eaabSShri Abhyankar 3642273d9f13SBarry Smith /*@C 3643273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3644273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3645273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3646273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3647273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 36482593348eSBarry Smith 3649d083f849SBarry Smith Collective 3650273d9f13SBarry Smith 3651273d9f13SBarry Smith Input Parameters: 3652273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3653bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3654bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3655273d9f13SBarry Smith . m - number of rows 3656273d9f13SBarry Smith . n - number of columns 365735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 365835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 36590298fd71SBarry Smith (possibly different for each block row) or NULL 3660273d9f13SBarry Smith 3661273d9f13SBarry Smith Output Parameter: 3662273d9f13SBarry Smith . A - the matrix 3663273d9f13SBarry Smith 3664175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3665f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3666175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3667175b88e8SBarry Smith 3668273d9f13SBarry Smith Options Database Keys: 3669a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3670273d9f13SBarry Smith block calculations (much slower) 3671a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3672273d9f13SBarry Smith 3673273d9f13SBarry Smith Level: intermediate 3674273d9f13SBarry Smith 3675273d9f13SBarry Smith Notes: 3676d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3677d1be2dadSMatthew Knepley 367849a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 367949a6f317SBarry Smith 368035d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 368135d8aa7fSBarry Smith 3682273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3683273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3684273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3685273d9f13SBarry Smith 3686273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 36870298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3688a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3689273d9f13SBarry Smith matrices. 3690273d9f13SBarry Smith 3691db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3692273d9f13SBarry Smith @*/ 36939371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 3694273d9f13SBarry Smith PetscFunctionBegin; 36959566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 36969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 36979566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 36989566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3699273d9f13SBarry Smith PetscFunctionReturn(0); 3700273d9f13SBarry Smith } 3701273d9f13SBarry Smith 3702273d9f13SBarry Smith /*@C 3703273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3704273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3705273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3706273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3707273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3708273d9f13SBarry Smith 3709d083f849SBarry Smith Collective 3710273d9f13SBarry Smith 3711273d9f13SBarry Smith Input Parameters: 37121c4f3114SJed Brown + B - the matrix 3713bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3714bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3715273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3716273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 37170298fd71SBarry Smith (possibly different for each block row) or NULL 3718273d9f13SBarry Smith 3719273d9f13SBarry Smith Options Database Keys: 3720a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 3721273d9f13SBarry Smith block calculations (much slower) 3722a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3723273d9f13SBarry Smith 3724273d9f13SBarry Smith Level: intermediate 3725273d9f13SBarry Smith 3726273d9f13SBarry Smith Notes: 372749a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 372849a6f317SBarry Smith 3729aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3730aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3731aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3732aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3733aa95bbe8SBarry Smith 3734273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3735273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3736273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3737273d9f13SBarry Smith 3738273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 37390298fd71SBarry Smith Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3740a7f22e61SSatish Balay allocation. See Users-Manual: ch_mat for details. 3741273d9f13SBarry Smith 3742db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3743273d9f13SBarry Smith @*/ 37449371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) { 3745273d9f13SBarry Smith PetscFunctionBegin; 37466ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 37476ba663aaSJed Brown PetscValidType(B, 1); 37486ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3749cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3750273d9f13SBarry Smith PetscFunctionReturn(0); 3751273d9f13SBarry Smith } 3752a1d92eedSBarry Smith 3753725b52f3SLisandro Dalcin /*@C 3754664954b6SBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3755725b52f3SLisandro Dalcin 3756d083f849SBarry Smith Collective 3757725b52f3SLisandro Dalcin 3758725b52f3SLisandro Dalcin Input Parameters: 37591c4f3114SJed Brown + B - the matrix 3760725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3761725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3762725b52f3SLisandro Dalcin - v - optional values in the matrix 3763725b52f3SLisandro Dalcin 3764664954b6SBarry Smith Level: advanced 3765725b52f3SLisandro Dalcin 37663adadaf3SJed Brown Notes: 37673adadaf3SJed Brown The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 37683adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 37693adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 37703adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 37713adadaf3SJed Brown block column and the second index is over columns within a block. 37723adadaf3SJed Brown 3773664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3774664954b6SBarry Smith 3775db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3776725b52f3SLisandro Dalcin @*/ 37779371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3778725b52f3SLisandro Dalcin PetscFunctionBegin; 37796ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 37806ba663aaSJed Brown PetscValidType(B, 1); 37816ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3782cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3783725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3784725b52f3SLisandro Dalcin } 3785725b52f3SLisandro Dalcin 3786c75a6043SHong Zhang /*@ 3787dfb205c3SBarry Smith MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3788c75a6043SHong Zhang 3789d083f849SBarry Smith Collective 3790c75a6043SHong Zhang 3791c75a6043SHong Zhang Input Parameters: 3792c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3793c75a6043SHong Zhang . bs - size of block 3794c75a6043SHong Zhang . m - number of rows 3795c75a6043SHong Zhang . n - number of columns 3796483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3797c75a6043SHong Zhang . j - column indices 3798c75a6043SHong Zhang - a - matrix values 3799c75a6043SHong Zhang 3800c75a6043SHong Zhang Output Parameter: 3801c75a6043SHong Zhang . mat - the matrix 3802c75a6043SHong Zhang 3803dfb205c3SBarry Smith Level: advanced 3804c75a6043SHong Zhang 3805c75a6043SHong Zhang Notes: 3806c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3807c75a6043SHong Zhang once the matrix is destroyed 3808c75a6043SHong Zhang 3809c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3810c75a6043SHong Zhang 3811c75a6043SHong Zhang The i and j indices are 0 based 3812c75a6043SHong Zhang 3813dfb205c3SBarry Smith When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3814dfb205c3SBarry Smith 38153adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 38163adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 38173adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 38183adadaf3SJed Brown with column-major ordering within blocks. 3819dfb205c3SBarry Smith 3820db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3821c75a6043SHong Zhang 3822c75a6043SHong Zhang @*/ 38239371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) { 3824c75a6043SHong Zhang PetscInt ii; 3825c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3826c75a6043SHong Zhang 3827c75a6043SHong Zhang PetscFunctionBegin; 38285f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 38295f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3830c75a6043SHong Zhang 38319566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 38329566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 38339566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 38349566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3835c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 38369566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 38379566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)*mat, 2 * m * sizeof(PetscInt))); 3838c75a6043SHong Zhang 3839c75a6043SHong Zhang baij->i = i; 3840c75a6043SHong Zhang baij->j = j; 3841c75a6043SHong Zhang baij->a = a; 384226fbe8dcSKarl Rupp 3843c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3844c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3845e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3846e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3847c75a6043SHong Zhang 3848c75a6043SHong Zhang for (ii = 0; ii < m; ii++) { 3849c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii]; 38506bdcaf15SBarry Smith PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]); 3851c75a6043SHong Zhang } 385276bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3853c75a6043SHong Zhang for (ii = 0; ii < baij->i[m]; ii++) { 38546bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 38556bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3856c75a6043SHong Zhang } 385776bd3646SJed Brown } 3858c75a6043SHong Zhang 38599566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 38609566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3861c75a6043SHong Zhang PetscFunctionReturn(0); 3862c75a6043SHong Zhang } 3863bdf6f3fcSHong Zhang 38649371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3865bdf6f3fcSHong Zhang PetscFunctionBegin; 38669566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3867bdf6f3fcSHong Zhang PetscFunctionReturn(0); 3868bdf6f3fcSHong Zhang } 3869