1be1d678aSKris Buschelman 22593348eSBarry Smith /* 3b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 42593348eSBarry Smith matrix storage format. 52593348eSBarry Smith */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7c6db04a5SJed Brown #include <petscblaslapack.h> 8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h> 1043516a2dSKris Buschelman 117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 137ea3e4caSstefano_zampini #endif 147ea3e4caSstefano_zampini 15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17b5b72c8aSIrina Sokolova #endif 18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19b5b72c8aSIrina Sokolova 209371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 219463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 22857cbf51SRichard Tran Mills PetscInt m, n, i; 239463ebdaSPierre Jolivet PetscInt ib, jb, bs = A->rmap->bs; 249463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a; 259463ebdaSPierre Jolivet 269463ebdaSPierre Jolivet PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 28857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] = 0.0; 299463ebdaSPierre Jolivet if (type == NORM_2) { 309463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 319463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 329463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 33857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 349463ebdaSPierre Jolivet a_val++; 359463ebdaSPierre Jolivet } 369463ebdaSPierre Jolivet } 379463ebdaSPierre Jolivet } 389463ebdaSPierre Jolivet } else if (type == NORM_1) { 399463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 409463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 419463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 42857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 439463ebdaSPierre Jolivet a_val++; 449463ebdaSPierre Jolivet } 459463ebdaSPierre Jolivet } 469463ebdaSPierre Jolivet } 479463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) { 489463ebdaSPierre Jolivet for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 499463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) { 509463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) { 519463ebdaSPierre Jolivet int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 52857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 539463ebdaSPierre Jolivet a_val++; 549463ebdaSPierre Jolivet } 559463ebdaSPierre Jolivet } 569463ebdaSPierre Jolivet } 57857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 58857cbf51SRichard Tran Mills for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 59857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 60857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 61857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 62857cbf51SRichard Tran Mills a_val++; 63857cbf51SRichard Tran Mills } 64857cbf51SRichard Tran Mills } 65857cbf51SRichard Tran Mills } 66857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 67857cbf51SRichard Tran Mills for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 68857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 69857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 70857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 71857cbf51SRichard Tran Mills a_val++; 72857cbf51SRichard Tran Mills } 73857cbf51SRichard Tran Mills } 74857cbf51SRichard Tran Mills } 75857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 769463ebdaSPierre Jolivet if (type == NORM_2) { 77857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 78857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 79857cbf51SRichard Tran Mills for (i = 0; i < n; i++) reductions[i] /= m; 809463ebdaSPierre Jolivet } 819463ebdaSPierre Jolivet PetscFunctionReturn(0); 829463ebdaSPierre Jolivet } 839463ebdaSPierre Jolivet 849371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) { 85b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 86de80f912SBarry Smith PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 877f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 8862bba022SBarry Smith PetscReal shift = 0.0; 891a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 90b01c7715SBarry Smith 91b01c7715SBarry Smith PetscFunctionBegin; 92a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 93a455e926SHong Zhang 949797317bSBarry Smith if (a->idiagvalid) { 959797317bSBarry Smith if (values) *values = a->idiag; 969797317bSBarry Smith PetscFunctionReturn(0); 979797317bSBarry Smith } 989566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 99b01c7715SBarry Smith diag_offset = a->diag; 100*4dfa11a4SJacob Faibussowitsch if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 101b01c7715SBarry Smith diag = a->idiag; 102bbead8a2SBarry Smith if (values) *values = a->idiag; 103b01c7715SBarry Smith /* factor and invert each block */ 104521d7252SBarry Smith switch (bs) { 105ab040260SJed Brown case 1: 106ab040260SJed Brown for (i = 0; i < mbs; i++) { 107ab040260SJed Brown odiag = v + 1 * diag_offset[i]; 108ab040260SJed Brown diag[0] = odiag[0]; 109ec1892c8SHong Zhang 110ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 111ec1892c8SHong Zhang if (allowzeropivot) { 1127b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1137b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 1147b6c816cSBarry Smith A->factorerror_zeropivot_row = i; 1159566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 11698921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 117ec1892c8SHong Zhang } 118ec1892c8SHong Zhang 119d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 120ab040260SJed Brown diag += 1; 121ab040260SJed Brown } 122ab040260SJed Brown break; 123b01c7715SBarry Smith case 2: 124b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 125b01c7715SBarry Smith odiag = v + 4 * diag_offset[i]; 1269371c9d4SSatish Balay diag[0] = odiag[0]; 1279371c9d4SSatish Balay diag[1] = odiag[1]; 1289371c9d4SSatish Balay diag[2] = odiag[2]; 1299371c9d4SSatish Balay diag[3] = odiag[3]; 1309566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1317b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 132b01c7715SBarry Smith diag += 4; 133b01c7715SBarry Smith } 134b01c7715SBarry Smith break; 135b01c7715SBarry Smith case 3: 136b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 137b01c7715SBarry Smith odiag = v + 9 * diag_offset[i]; 1389371c9d4SSatish Balay diag[0] = odiag[0]; 1399371c9d4SSatish Balay diag[1] = odiag[1]; 1409371c9d4SSatish Balay diag[2] = odiag[2]; 1419371c9d4SSatish Balay diag[3] = odiag[3]; 1429371c9d4SSatish Balay diag[4] = odiag[4]; 1439371c9d4SSatish Balay diag[5] = odiag[5]; 1449371c9d4SSatish Balay diag[6] = odiag[6]; 1459371c9d4SSatish Balay diag[7] = odiag[7]; 146b01c7715SBarry Smith diag[8] = odiag[8]; 1479566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 1487b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 149b01c7715SBarry Smith diag += 9; 150b01c7715SBarry Smith } 151b01c7715SBarry Smith break; 152b01c7715SBarry Smith case 4: 153b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 154b01c7715SBarry Smith odiag = v + 16 * diag_offset[i]; 1559566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16)); 1569566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 1577b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 158b01c7715SBarry Smith diag += 16; 159b01c7715SBarry Smith } 160b01c7715SBarry Smith break; 161b01c7715SBarry Smith case 5: 162b01c7715SBarry Smith for (i = 0; i < mbs; i++) { 163b01c7715SBarry Smith odiag = v + 25 * diag_offset[i]; 1649566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25)); 1659566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 1667b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 167b01c7715SBarry Smith diag += 25; 168b01c7715SBarry Smith } 169b01c7715SBarry Smith break; 170d49b2adcSBarry Smith case 6: 171d49b2adcSBarry Smith for (i = 0; i < mbs; i++) { 172d49b2adcSBarry Smith odiag = v + 36 * diag_offset[i]; 1739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36)); 1749566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 1757b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 176d49b2adcSBarry Smith diag += 36; 177d49b2adcSBarry Smith } 178d49b2adcSBarry Smith break; 179de80f912SBarry Smith case 7: 180de80f912SBarry Smith for (i = 0; i < mbs; i++) { 181de80f912SBarry Smith odiag = v + 49 * diag_offset[i]; 1829566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49)); 1839566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 1847b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 185de80f912SBarry Smith diag += 49; 186de80f912SBarry Smith } 187de80f912SBarry Smith break; 188b01c7715SBarry Smith default: 1899566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 190de80f912SBarry Smith for (i = 0; i < mbs; i++) { 191de80f912SBarry Smith odiag = v + bs2 * diag_offset[i]; 1929566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2)); 1939566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 1947b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 195de80f912SBarry Smith diag += bs2; 196de80f912SBarry Smith } 1979566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots)); 198b01c7715SBarry Smith } 199b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 200b01c7715SBarry Smith PetscFunctionReturn(0); 201b01c7715SBarry Smith } 202b01c7715SBarry Smith 2039371c9d4SSatish Balay PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2046d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 205e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t; 206e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag; 207e48d15efSToby Isaac const PetscScalar *b, *xb; 2085455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 209e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 210c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 211b01c7715SBarry Smith 212b01c7715SBarry Smith PetscFunctionBegin; 213b01c7715SBarry Smith its = its * lits; 2145f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 2155f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2165f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 2175f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 2185f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 219b01c7715SBarry Smith 2209566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 221b01c7715SBarry Smith 222b2ec919aSToby Isaac if (!m) PetscFunctionReturn(0); 223b01c7715SBarry Smith diag = a->diag; 224b01c7715SBarry Smith idiag = a->idiag; 225de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n); 22648a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 22748a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 22848a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 2293475c22fSBarry Smith work = a->mult_work; 2303475c22fSBarry Smith t = a->sor_workt; 231de80f912SBarry Smith w = a->sor_work; 232de80f912SBarry Smith 2339566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 235de80f912SBarry Smith 236de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 237de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 238e48d15efSToby Isaac switch (bs) { 239e48d15efSToby Isaac case 1: 240e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b); 241e48d15efSToby Isaac t[0] = b[0]; 242e48d15efSToby Isaac i2 = 1; 243e48d15efSToby Isaac idiag += 1; 244e48d15efSToby Isaac for (i = 1; i < m; i++) { 245e48d15efSToby Isaac v = aa + ai[i]; 246e48d15efSToby Isaac vi = aj + ai[i]; 247e48d15efSToby Isaac nz = diag[i] - ai[i]; 248e48d15efSToby Isaac s[0] = b[i2]; 249e48d15efSToby Isaac for (j = 0; j < nz; j++) { 250e48d15efSToby Isaac xw[0] = x[vi[j]]; 251e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 252e48d15efSToby Isaac } 253e48d15efSToby Isaac t[i2] = s[0]; 254e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 255e48d15efSToby Isaac x[i2] = xw[0]; 256e48d15efSToby Isaac idiag += 1; 257e48d15efSToby Isaac i2 += 1; 258e48d15efSToby Isaac } 259e48d15efSToby Isaac break; 260e48d15efSToby Isaac case 2: 261e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b); 2629371c9d4SSatish Balay t[0] = b[0]; 2639371c9d4SSatish Balay t[1] = b[1]; 264e48d15efSToby Isaac i2 = 2; 265e48d15efSToby Isaac idiag += 4; 266e48d15efSToby Isaac for (i = 1; i < m; i++) { 267e48d15efSToby Isaac v = aa + 4 * ai[i]; 268e48d15efSToby Isaac vi = aj + ai[i]; 269e48d15efSToby Isaac nz = diag[i] - ai[i]; 2709371c9d4SSatish Balay s[0] = b[i2]; 2719371c9d4SSatish Balay s[1] = b[i2 + 1]; 272e48d15efSToby Isaac for (j = 0; j < nz; j++) { 273e48d15efSToby Isaac idx = 2 * vi[j]; 274e48d15efSToby Isaac it = 4 * j; 2759371c9d4SSatish Balay xw[0] = x[idx]; 2769371c9d4SSatish Balay xw[1] = x[1 + idx]; 277e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 278e48d15efSToby Isaac } 2799371c9d4SSatish Balay t[i2] = s[0]; 2809371c9d4SSatish Balay t[i2 + 1] = s[1]; 281e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 2829371c9d4SSatish Balay x[i2] = xw[0]; 2839371c9d4SSatish Balay x[i2 + 1] = xw[1]; 284e48d15efSToby Isaac idiag += 4; 285e48d15efSToby Isaac i2 += 2; 286e48d15efSToby Isaac } 287e48d15efSToby Isaac break; 288e48d15efSToby Isaac case 3: 289e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b); 2909371c9d4SSatish Balay t[0] = b[0]; 2919371c9d4SSatish Balay t[1] = b[1]; 2929371c9d4SSatish Balay t[2] = b[2]; 293e48d15efSToby Isaac i2 = 3; 294e48d15efSToby Isaac idiag += 9; 295e48d15efSToby Isaac for (i = 1; i < m; i++) { 296e48d15efSToby Isaac v = aa + 9 * ai[i]; 297e48d15efSToby Isaac vi = aj + ai[i]; 298e48d15efSToby Isaac nz = diag[i] - ai[i]; 2999371c9d4SSatish Balay s[0] = b[i2]; 3009371c9d4SSatish Balay s[1] = b[i2 + 1]; 3019371c9d4SSatish Balay s[2] = b[i2 + 2]; 302e48d15efSToby Isaac while (nz--) { 303e48d15efSToby Isaac idx = 3 * (*vi++); 3049371c9d4SSatish Balay xw[0] = x[idx]; 3059371c9d4SSatish Balay xw[1] = x[1 + idx]; 3069371c9d4SSatish Balay xw[2] = x[2 + idx]; 307e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 308e48d15efSToby Isaac v += 9; 309e48d15efSToby Isaac } 3109371c9d4SSatish Balay t[i2] = s[0]; 3119371c9d4SSatish Balay t[i2 + 1] = s[1]; 3129371c9d4SSatish Balay t[i2 + 2] = s[2]; 313e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 3149371c9d4SSatish Balay x[i2] = xw[0]; 3159371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3169371c9d4SSatish Balay x[i2 + 2] = xw[2]; 317e48d15efSToby Isaac idiag += 9; 318e48d15efSToby Isaac i2 += 3; 319e48d15efSToby Isaac } 320e48d15efSToby Isaac break; 321e48d15efSToby Isaac case 4: 322e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b); 3239371c9d4SSatish Balay t[0] = b[0]; 3249371c9d4SSatish Balay t[1] = b[1]; 3259371c9d4SSatish Balay t[2] = b[2]; 3269371c9d4SSatish Balay t[3] = b[3]; 327e48d15efSToby Isaac i2 = 4; 328e48d15efSToby Isaac idiag += 16; 329e48d15efSToby Isaac for (i = 1; i < m; i++) { 330e48d15efSToby Isaac v = aa + 16 * ai[i]; 331e48d15efSToby Isaac vi = aj + ai[i]; 332e48d15efSToby Isaac nz = diag[i] - ai[i]; 3339371c9d4SSatish Balay s[0] = b[i2]; 3349371c9d4SSatish Balay s[1] = b[i2 + 1]; 3359371c9d4SSatish Balay s[2] = b[i2 + 2]; 3369371c9d4SSatish Balay s[3] = b[i2 + 3]; 337e48d15efSToby Isaac while (nz--) { 338e48d15efSToby Isaac idx = 4 * (*vi++); 3399371c9d4SSatish Balay xw[0] = x[idx]; 3409371c9d4SSatish Balay xw[1] = x[1 + idx]; 3419371c9d4SSatish Balay xw[2] = x[2 + idx]; 3429371c9d4SSatish Balay xw[3] = x[3 + idx]; 343e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 344e48d15efSToby Isaac v += 16; 345e48d15efSToby Isaac } 3469371c9d4SSatish Balay t[i2] = s[0]; 3479371c9d4SSatish Balay t[i2 + 1] = s[1]; 3489371c9d4SSatish Balay t[i2 + 2] = s[2]; 3499371c9d4SSatish Balay t[i2 + 3] = s[3]; 350e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 3519371c9d4SSatish Balay x[i2] = xw[0]; 3529371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3539371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3549371c9d4SSatish Balay x[i2 + 3] = xw[3]; 355e48d15efSToby Isaac idiag += 16; 356e48d15efSToby Isaac i2 += 4; 357e48d15efSToby Isaac } 358e48d15efSToby Isaac break; 359e48d15efSToby Isaac case 5: 360e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b); 3619371c9d4SSatish Balay t[0] = b[0]; 3629371c9d4SSatish Balay t[1] = b[1]; 3639371c9d4SSatish Balay t[2] = b[2]; 3649371c9d4SSatish Balay t[3] = b[3]; 3659371c9d4SSatish Balay t[4] = b[4]; 366e48d15efSToby Isaac i2 = 5; 367e48d15efSToby Isaac idiag += 25; 368e48d15efSToby Isaac for (i = 1; i < m; i++) { 369e48d15efSToby Isaac v = aa + 25 * ai[i]; 370e48d15efSToby Isaac vi = aj + ai[i]; 371e48d15efSToby Isaac nz = diag[i] - ai[i]; 3729371c9d4SSatish Balay s[0] = b[i2]; 3739371c9d4SSatish Balay s[1] = b[i2 + 1]; 3749371c9d4SSatish Balay s[2] = b[i2 + 2]; 3759371c9d4SSatish Balay s[3] = b[i2 + 3]; 3769371c9d4SSatish Balay s[4] = b[i2 + 4]; 377e48d15efSToby Isaac while (nz--) { 378e48d15efSToby Isaac idx = 5 * (*vi++); 3799371c9d4SSatish Balay xw[0] = x[idx]; 3809371c9d4SSatish Balay xw[1] = x[1 + idx]; 3819371c9d4SSatish Balay xw[2] = x[2 + idx]; 3829371c9d4SSatish Balay xw[3] = x[3 + idx]; 3839371c9d4SSatish Balay xw[4] = x[4 + idx]; 384e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 385e48d15efSToby Isaac v += 25; 386e48d15efSToby Isaac } 3879371c9d4SSatish Balay t[i2] = s[0]; 3889371c9d4SSatish Balay t[i2 + 1] = s[1]; 3899371c9d4SSatish Balay t[i2 + 2] = s[2]; 3909371c9d4SSatish Balay t[i2 + 3] = s[3]; 3919371c9d4SSatish Balay t[i2 + 4] = s[4]; 392e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 3939371c9d4SSatish Balay x[i2] = xw[0]; 3949371c9d4SSatish Balay x[i2 + 1] = xw[1]; 3959371c9d4SSatish Balay x[i2 + 2] = xw[2]; 3969371c9d4SSatish Balay x[i2 + 3] = xw[3]; 3979371c9d4SSatish Balay x[i2 + 4] = xw[4]; 398e48d15efSToby Isaac idiag += 25; 399e48d15efSToby Isaac i2 += 5; 400e48d15efSToby Isaac } 401e48d15efSToby Isaac break; 402e48d15efSToby Isaac case 6: 403e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b); 4049371c9d4SSatish Balay t[0] = b[0]; 4059371c9d4SSatish Balay t[1] = b[1]; 4069371c9d4SSatish Balay t[2] = b[2]; 4079371c9d4SSatish Balay t[3] = b[3]; 4089371c9d4SSatish Balay t[4] = b[4]; 4099371c9d4SSatish Balay t[5] = b[5]; 410e48d15efSToby Isaac i2 = 6; 411e48d15efSToby Isaac idiag += 36; 412e48d15efSToby Isaac for (i = 1; i < m; i++) { 413e48d15efSToby Isaac v = aa + 36 * ai[i]; 414e48d15efSToby Isaac vi = aj + ai[i]; 415e48d15efSToby Isaac nz = diag[i] - ai[i]; 4169371c9d4SSatish Balay s[0] = b[i2]; 4179371c9d4SSatish Balay s[1] = b[i2 + 1]; 4189371c9d4SSatish Balay s[2] = b[i2 + 2]; 4199371c9d4SSatish Balay s[3] = b[i2 + 3]; 4209371c9d4SSatish Balay s[4] = b[i2 + 4]; 4219371c9d4SSatish Balay s[5] = b[i2 + 5]; 422e48d15efSToby Isaac while (nz--) { 423e48d15efSToby Isaac idx = 6 * (*vi++); 4249371c9d4SSatish Balay xw[0] = x[idx]; 4259371c9d4SSatish Balay xw[1] = x[1 + idx]; 4269371c9d4SSatish Balay xw[2] = x[2 + idx]; 4279371c9d4SSatish Balay xw[3] = x[3 + idx]; 4289371c9d4SSatish Balay xw[4] = x[4 + idx]; 4299371c9d4SSatish Balay xw[5] = x[5 + idx]; 430e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 431e48d15efSToby Isaac v += 36; 432e48d15efSToby Isaac } 4339371c9d4SSatish Balay t[i2] = s[0]; 4349371c9d4SSatish Balay t[i2 + 1] = s[1]; 4359371c9d4SSatish Balay t[i2 + 2] = s[2]; 4369371c9d4SSatish Balay t[i2 + 3] = s[3]; 4379371c9d4SSatish Balay t[i2 + 4] = s[4]; 4389371c9d4SSatish Balay t[i2 + 5] = s[5]; 439e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 4409371c9d4SSatish Balay x[i2] = xw[0]; 4419371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4429371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4439371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4449371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4459371c9d4SSatish Balay x[i2 + 5] = xw[5]; 446e48d15efSToby Isaac idiag += 36; 447e48d15efSToby Isaac i2 += 6; 448e48d15efSToby Isaac } 449e48d15efSToby Isaac break; 450e48d15efSToby Isaac case 7: 451e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 4529371c9d4SSatish Balay t[0] = b[0]; 4539371c9d4SSatish Balay t[1] = b[1]; 4549371c9d4SSatish Balay t[2] = b[2]; 4559371c9d4SSatish Balay t[3] = b[3]; 4569371c9d4SSatish Balay t[4] = b[4]; 4579371c9d4SSatish Balay t[5] = b[5]; 4589371c9d4SSatish Balay t[6] = b[6]; 459e48d15efSToby Isaac i2 = 7; 460e48d15efSToby Isaac idiag += 49; 461e48d15efSToby Isaac for (i = 1; i < m; i++) { 462e48d15efSToby Isaac v = aa + 49 * ai[i]; 463e48d15efSToby Isaac vi = aj + ai[i]; 464e48d15efSToby Isaac nz = diag[i] - ai[i]; 4659371c9d4SSatish Balay s[0] = b[i2]; 4669371c9d4SSatish Balay s[1] = b[i2 + 1]; 4679371c9d4SSatish Balay s[2] = b[i2 + 2]; 4689371c9d4SSatish Balay s[3] = b[i2 + 3]; 4699371c9d4SSatish Balay s[4] = b[i2 + 4]; 4709371c9d4SSatish Balay s[5] = b[i2 + 5]; 4719371c9d4SSatish Balay s[6] = b[i2 + 6]; 472e48d15efSToby Isaac while (nz--) { 473e48d15efSToby Isaac idx = 7 * (*vi++); 4749371c9d4SSatish Balay xw[0] = x[idx]; 4759371c9d4SSatish Balay xw[1] = x[1 + idx]; 4769371c9d4SSatish Balay xw[2] = x[2 + idx]; 4779371c9d4SSatish Balay xw[3] = x[3 + idx]; 4789371c9d4SSatish Balay xw[4] = x[4 + idx]; 4799371c9d4SSatish Balay xw[5] = x[5 + idx]; 4809371c9d4SSatish Balay xw[6] = x[6 + idx]; 481e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 482e48d15efSToby Isaac v += 49; 483e48d15efSToby Isaac } 4849371c9d4SSatish Balay t[i2] = s[0]; 4859371c9d4SSatish Balay t[i2 + 1] = s[1]; 4869371c9d4SSatish Balay t[i2 + 2] = s[2]; 4879371c9d4SSatish Balay t[i2 + 3] = s[3]; 4889371c9d4SSatish Balay t[i2 + 4] = s[4]; 4899371c9d4SSatish Balay t[i2 + 5] = s[5]; 4909371c9d4SSatish Balay t[i2 + 6] = s[6]; 491e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 4929371c9d4SSatish Balay x[i2] = xw[0]; 4939371c9d4SSatish Balay x[i2 + 1] = xw[1]; 4949371c9d4SSatish Balay x[i2 + 2] = xw[2]; 4959371c9d4SSatish Balay x[i2 + 3] = xw[3]; 4969371c9d4SSatish Balay x[i2 + 4] = xw[4]; 4979371c9d4SSatish Balay x[i2 + 5] = xw[5]; 4989371c9d4SSatish Balay x[i2 + 6] = xw[6]; 499e48d15efSToby Isaac idiag += 49; 500e48d15efSToby Isaac i2 += 7; 501e48d15efSToby Isaac } 502e48d15efSToby Isaac break; 503e48d15efSToby Isaac default: 50496b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 5059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); 506de80f912SBarry Smith i2 = bs; 507de80f912SBarry Smith idiag += bs2; 508de80f912SBarry Smith for (i = 1; i < m; i++) { 509de80f912SBarry Smith v = aa + bs2 * ai[i]; 510de80f912SBarry Smith vi = aj + ai[i]; 511de80f912SBarry Smith nz = diag[i] - ai[i]; 512de80f912SBarry Smith 5139566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 514de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 515de80f912SBarry Smith workt = work; 516de80f912SBarry Smith for (j = 0; j < nz; j++) { 5179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 518de80f912SBarry Smith workt += bs; 519de80f912SBarry Smith } 52096b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 5219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs)); 52296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 523de80f912SBarry Smith 524de80f912SBarry Smith idiag += bs2; 525de80f912SBarry Smith i2 += bs; 526de80f912SBarry Smith } 527e48d15efSToby Isaac break; 528e48d15efSToby Isaac } 529de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 5309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 531e48d15efSToby Isaac xb = t; 5329371c9d4SSatish Balay } else xb = b; 533de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 534e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 535e48d15efSToby Isaac i2 = bs * (m - 1); 536e48d15efSToby Isaac switch (bs) { 537e48d15efSToby Isaac case 1: 538e48d15efSToby Isaac s[0] = xb[i2]; 539e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 540e48d15efSToby Isaac x[i2] = xw[0]; 541e48d15efSToby Isaac i2 -= 1; 542e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 543e48d15efSToby Isaac v = aa + (diag[i] + 1); 544e48d15efSToby Isaac vi = aj + diag[i] + 1; 545e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 546e48d15efSToby Isaac s[0] = xb[i2]; 547e48d15efSToby Isaac for (j = 0; j < nz; j++) { 548e48d15efSToby Isaac xw[0] = x[vi[j]]; 549e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 550e48d15efSToby Isaac } 551e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 552e48d15efSToby Isaac x[i2] = xw[0]; 553e48d15efSToby Isaac idiag -= 1; 554e48d15efSToby Isaac i2 -= 1; 555e48d15efSToby Isaac } 556e48d15efSToby Isaac break; 557e48d15efSToby Isaac case 2: 5589371c9d4SSatish Balay s[0] = xb[i2]; 5599371c9d4SSatish Balay s[1] = xb[i2 + 1]; 560e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5619371c9d4SSatish Balay x[i2] = xw[0]; 5629371c9d4SSatish Balay x[i2 + 1] = xw[1]; 563e48d15efSToby Isaac i2 -= 2; 564e48d15efSToby Isaac idiag -= 4; 565e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 566e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1); 567e48d15efSToby Isaac vi = aj + diag[i] + 1; 568e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5699371c9d4SSatish Balay s[0] = xb[i2]; 5709371c9d4SSatish Balay s[1] = xb[i2 + 1]; 571e48d15efSToby Isaac for (j = 0; j < nz; j++) { 572e48d15efSToby Isaac idx = 2 * vi[j]; 573e48d15efSToby Isaac it = 4 * j; 5749371c9d4SSatish Balay xw[0] = x[idx]; 5759371c9d4SSatish Balay xw[1] = x[1 + idx]; 576e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 577e48d15efSToby Isaac } 578e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 5799371c9d4SSatish Balay x[i2] = xw[0]; 5809371c9d4SSatish Balay x[i2 + 1] = xw[1]; 581e48d15efSToby Isaac idiag -= 4; 582e48d15efSToby Isaac i2 -= 2; 583e48d15efSToby Isaac } 584e48d15efSToby Isaac break; 585e48d15efSToby Isaac case 3: 5869371c9d4SSatish Balay s[0] = xb[i2]; 5879371c9d4SSatish Balay s[1] = xb[i2 + 1]; 5889371c9d4SSatish Balay s[2] = xb[i2 + 2]; 589e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 5909371c9d4SSatish Balay x[i2] = xw[0]; 5919371c9d4SSatish Balay x[i2 + 1] = xw[1]; 5929371c9d4SSatish Balay x[i2 + 2] = xw[2]; 593e48d15efSToby Isaac i2 -= 3; 594e48d15efSToby Isaac idiag -= 9; 595e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 596e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1); 597e48d15efSToby Isaac vi = aj + diag[i] + 1; 598e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 5999371c9d4SSatish Balay s[0] = xb[i2]; 6009371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6019371c9d4SSatish Balay s[2] = xb[i2 + 2]; 602e48d15efSToby Isaac while (nz--) { 603e48d15efSToby Isaac idx = 3 * (*vi++); 6049371c9d4SSatish Balay xw[0] = x[idx]; 6059371c9d4SSatish Balay xw[1] = x[1 + idx]; 6069371c9d4SSatish Balay xw[2] = x[2 + idx]; 607e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 608e48d15efSToby Isaac v += 9; 609e48d15efSToby Isaac } 610e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 6119371c9d4SSatish Balay x[i2] = xw[0]; 6129371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6139371c9d4SSatish Balay x[i2 + 2] = xw[2]; 614e48d15efSToby Isaac idiag -= 9; 615e48d15efSToby Isaac i2 -= 3; 616e48d15efSToby Isaac } 617e48d15efSToby Isaac break; 618e48d15efSToby Isaac case 4: 6199371c9d4SSatish Balay s[0] = xb[i2]; 6209371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6219371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6229371c9d4SSatish Balay s[3] = xb[i2 + 3]; 623e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6249371c9d4SSatish Balay x[i2] = xw[0]; 6259371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6269371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6279371c9d4SSatish Balay x[i2 + 3] = xw[3]; 628e48d15efSToby Isaac i2 -= 4; 629e48d15efSToby Isaac idiag -= 16; 630e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 631e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1); 632e48d15efSToby Isaac vi = aj + diag[i] + 1; 633e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6349371c9d4SSatish Balay s[0] = xb[i2]; 6359371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6369371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6379371c9d4SSatish Balay s[3] = xb[i2 + 3]; 638e48d15efSToby Isaac while (nz--) { 639e48d15efSToby Isaac idx = 4 * (*vi++); 6409371c9d4SSatish Balay xw[0] = x[idx]; 6419371c9d4SSatish Balay xw[1] = x[1 + idx]; 6429371c9d4SSatish Balay xw[2] = x[2 + idx]; 6439371c9d4SSatish Balay xw[3] = x[3 + idx]; 644e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 645e48d15efSToby Isaac v += 16; 646e48d15efSToby Isaac } 647e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 6489371c9d4SSatish Balay x[i2] = xw[0]; 6499371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6509371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6519371c9d4SSatish Balay x[i2 + 3] = xw[3]; 652e48d15efSToby Isaac idiag -= 16; 653e48d15efSToby Isaac i2 -= 4; 654e48d15efSToby Isaac } 655e48d15efSToby Isaac break; 656e48d15efSToby Isaac case 5: 6579371c9d4SSatish Balay s[0] = xb[i2]; 6589371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6599371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6609371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6619371c9d4SSatish Balay s[4] = xb[i2 + 4]; 662e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6639371c9d4SSatish Balay x[i2] = xw[0]; 6649371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6659371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6669371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6679371c9d4SSatish Balay x[i2 + 4] = xw[4]; 668e48d15efSToby Isaac i2 -= 5; 669e48d15efSToby Isaac idiag -= 25; 670e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 671e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1); 672e48d15efSToby Isaac vi = aj + diag[i] + 1; 673e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 6749371c9d4SSatish Balay s[0] = xb[i2]; 6759371c9d4SSatish Balay s[1] = xb[i2 + 1]; 6769371c9d4SSatish Balay s[2] = xb[i2 + 2]; 6779371c9d4SSatish Balay s[3] = xb[i2 + 3]; 6789371c9d4SSatish Balay s[4] = xb[i2 + 4]; 679e48d15efSToby Isaac while (nz--) { 680e48d15efSToby Isaac idx = 5 * (*vi++); 6819371c9d4SSatish Balay xw[0] = x[idx]; 6829371c9d4SSatish Balay xw[1] = x[1 + idx]; 6839371c9d4SSatish Balay xw[2] = x[2 + idx]; 6849371c9d4SSatish Balay xw[3] = x[3 + idx]; 6859371c9d4SSatish Balay xw[4] = x[4 + idx]; 686e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 687e48d15efSToby Isaac v += 25; 688e48d15efSToby Isaac } 689e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 6909371c9d4SSatish Balay x[i2] = xw[0]; 6919371c9d4SSatish Balay x[i2 + 1] = xw[1]; 6929371c9d4SSatish Balay x[i2 + 2] = xw[2]; 6939371c9d4SSatish Balay x[i2 + 3] = xw[3]; 6949371c9d4SSatish Balay x[i2 + 4] = xw[4]; 695e48d15efSToby Isaac idiag -= 25; 696e48d15efSToby Isaac i2 -= 5; 697e48d15efSToby Isaac } 698e48d15efSToby Isaac break; 699e48d15efSToby Isaac case 6: 7009371c9d4SSatish Balay s[0] = xb[i2]; 7019371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7029371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7039371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7049371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7059371c9d4SSatish Balay s[5] = xb[i2 + 5]; 706e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7079371c9d4SSatish Balay x[i2] = xw[0]; 7089371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7099371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7109371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7119371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7129371c9d4SSatish Balay x[i2 + 5] = xw[5]; 713e48d15efSToby Isaac i2 -= 6; 714e48d15efSToby Isaac idiag -= 36; 715e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 716e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1); 717e48d15efSToby Isaac vi = aj + diag[i] + 1; 718e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7199371c9d4SSatish Balay s[0] = xb[i2]; 7209371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7219371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7229371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7239371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7249371c9d4SSatish Balay s[5] = xb[i2 + 5]; 725e48d15efSToby Isaac while (nz--) { 726e48d15efSToby Isaac idx = 6 * (*vi++); 7279371c9d4SSatish Balay xw[0] = x[idx]; 7289371c9d4SSatish Balay xw[1] = x[1 + idx]; 7299371c9d4SSatish Balay xw[2] = x[2 + idx]; 7309371c9d4SSatish Balay xw[3] = x[3 + idx]; 7319371c9d4SSatish Balay xw[4] = x[4 + idx]; 7329371c9d4SSatish Balay xw[5] = x[5 + idx]; 733e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 734e48d15efSToby Isaac v += 36; 735e48d15efSToby Isaac } 736e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 7379371c9d4SSatish Balay x[i2] = xw[0]; 7389371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7399371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7409371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7419371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7429371c9d4SSatish Balay x[i2 + 5] = xw[5]; 743e48d15efSToby Isaac idiag -= 36; 744e48d15efSToby Isaac i2 -= 6; 745e48d15efSToby Isaac } 746e48d15efSToby Isaac break; 747e48d15efSToby Isaac case 7: 7489371c9d4SSatish Balay s[0] = xb[i2]; 7499371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7509371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7519371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7529371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7539371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7549371c9d4SSatish Balay s[6] = xb[i2 + 6]; 755e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b); 7569371c9d4SSatish Balay x[i2] = xw[0]; 7579371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7589371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7599371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7609371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7619371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7629371c9d4SSatish Balay x[i2 + 6] = xw[6]; 763e48d15efSToby Isaac i2 -= 7; 764e48d15efSToby Isaac idiag -= 49; 765e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) { 766e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1); 767e48d15efSToby Isaac vi = aj + diag[i] + 1; 768e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1; 7699371c9d4SSatish Balay s[0] = xb[i2]; 7709371c9d4SSatish Balay s[1] = xb[i2 + 1]; 7719371c9d4SSatish Balay s[2] = xb[i2 + 2]; 7729371c9d4SSatish Balay s[3] = xb[i2 + 3]; 7739371c9d4SSatish Balay s[4] = xb[i2 + 4]; 7749371c9d4SSatish Balay s[5] = xb[i2 + 5]; 7759371c9d4SSatish Balay s[6] = xb[i2 + 6]; 776e48d15efSToby Isaac while (nz--) { 777e48d15efSToby Isaac idx = 7 * (*vi++); 7789371c9d4SSatish Balay xw[0] = x[idx]; 7799371c9d4SSatish Balay xw[1] = x[1 + idx]; 7809371c9d4SSatish Balay xw[2] = x[2 + idx]; 7819371c9d4SSatish Balay xw[3] = x[3 + idx]; 7829371c9d4SSatish Balay xw[4] = x[4 + idx]; 7839371c9d4SSatish Balay xw[5] = x[5 + idx]; 7849371c9d4SSatish Balay xw[6] = x[6 + idx]; 785e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 786e48d15efSToby Isaac v += 49; 787e48d15efSToby Isaac } 788e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 7899371c9d4SSatish Balay x[i2] = xw[0]; 7909371c9d4SSatish Balay x[i2 + 1] = xw[1]; 7919371c9d4SSatish Balay x[i2 + 2] = xw[2]; 7929371c9d4SSatish Balay x[i2 + 3] = xw[3]; 7939371c9d4SSatish Balay x[i2 + 4] = xw[4]; 7949371c9d4SSatish Balay x[i2 + 5] = xw[5]; 7959371c9d4SSatish Balay x[i2 + 6] = xw[6]; 796e48d15efSToby Isaac idiag -= 49; 797e48d15efSToby Isaac i2 -= 7; 798e48d15efSToby Isaac } 799e48d15efSToby Isaac break; 800e48d15efSToby Isaac default: 8019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 80296b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 803de80f912SBarry Smith i2 -= bs; 804e48d15efSToby Isaac idiag -= bs2; 805de80f912SBarry Smith for (i = m - 2; i >= 0; i--) { 806de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1); 807de80f912SBarry Smith vi = aj + diag[i] + 1; 808de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1; 809de80f912SBarry Smith 8109566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs)); 811de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */ 812de80f912SBarry Smith workt = work; 813de80f912SBarry Smith for (j = 0; j < nz; j++) { 8149566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 815de80f912SBarry Smith workt += bs; 816de80f912SBarry Smith } 81796b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 81896b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 819e48d15efSToby Isaac 820de80f912SBarry Smith idiag -= bs2; 821de80f912SBarry Smith i2 -= bs; 822de80f912SBarry Smith } 823e48d15efSToby Isaac break; 824e48d15efSToby Isaac } 8259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 826de80f912SBarry Smith } 827e48d15efSToby Isaac its--; 828e48d15efSToby Isaac } 829e48d15efSToby Isaac while (its--) { 830e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 831e48d15efSToby Isaac idiag = a->idiag; 832e48d15efSToby Isaac i2 = 0; 833e48d15efSToby Isaac switch (bs) { 834e48d15efSToby Isaac case 1: 835e48d15efSToby Isaac for (i = 0; i < m; i++) { 836e48d15efSToby Isaac v = aa + ai[i]; 837e48d15efSToby Isaac vi = aj + ai[i]; 838e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 839e48d15efSToby Isaac s[0] = b[i2]; 840e48d15efSToby Isaac for (j = 0; j < nz; j++) { 841e48d15efSToby Isaac xw[0] = x[vi[j]]; 842e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 843e48d15efSToby Isaac } 844e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 845e48d15efSToby Isaac x[i2] += xw[0]; 846e48d15efSToby Isaac idiag += 1; 847e48d15efSToby Isaac i2 += 1; 848e48d15efSToby Isaac } 849e48d15efSToby Isaac break; 850e48d15efSToby Isaac case 2: 851e48d15efSToby Isaac for (i = 0; i < m; i++) { 852e48d15efSToby Isaac v = aa + 4 * ai[i]; 853e48d15efSToby Isaac vi = aj + ai[i]; 854e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8559371c9d4SSatish Balay s[0] = b[i2]; 8569371c9d4SSatish Balay s[1] = b[i2 + 1]; 857e48d15efSToby Isaac for (j = 0; j < nz; j++) { 858e48d15efSToby Isaac idx = 2 * vi[j]; 859e48d15efSToby Isaac it = 4 * j; 8609371c9d4SSatish Balay xw[0] = x[idx]; 8619371c9d4SSatish Balay xw[1] = x[1 + idx]; 862e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 863e48d15efSToby Isaac } 864e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 8659371c9d4SSatish Balay x[i2] += xw[0]; 8669371c9d4SSatish Balay x[i2 + 1] += xw[1]; 867e48d15efSToby Isaac idiag += 4; 868e48d15efSToby Isaac i2 += 2; 869e48d15efSToby Isaac } 870e48d15efSToby Isaac break; 871e48d15efSToby Isaac case 3: 872e48d15efSToby Isaac for (i = 0; i < m; i++) { 873e48d15efSToby Isaac v = aa + 9 * ai[i]; 874e48d15efSToby Isaac vi = aj + ai[i]; 875e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 8769371c9d4SSatish Balay s[0] = b[i2]; 8779371c9d4SSatish Balay s[1] = b[i2 + 1]; 8789371c9d4SSatish Balay s[2] = b[i2 + 2]; 879e48d15efSToby Isaac while (nz--) { 880e48d15efSToby Isaac idx = 3 * (*vi++); 8819371c9d4SSatish Balay xw[0] = x[idx]; 8829371c9d4SSatish Balay xw[1] = x[1 + idx]; 8839371c9d4SSatish Balay xw[2] = x[2 + idx]; 884e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 885e48d15efSToby Isaac v += 9; 886e48d15efSToby Isaac } 887e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 8889371c9d4SSatish Balay x[i2] += xw[0]; 8899371c9d4SSatish Balay x[i2 + 1] += xw[1]; 8909371c9d4SSatish Balay x[i2 + 2] += xw[2]; 891e48d15efSToby Isaac idiag += 9; 892e48d15efSToby Isaac i2 += 3; 893e48d15efSToby Isaac } 894e48d15efSToby Isaac break; 895e48d15efSToby Isaac case 4: 896e48d15efSToby Isaac for (i = 0; i < m; i++) { 897e48d15efSToby Isaac v = aa + 16 * ai[i]; 898e48d15efSToby Isaac vi = aj + ai[i]; 899e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9009371c9d4SSatish Balay s[0] = b[i2]; 9019371c9d4SSatish Balay s[1] = b[i2 + 1]; 9029371c9d4SSatish Balay s[2] = b[i2 + 2]; 9039371c9d4SSatish Balay s[3] = b[i2 + 3]; 904e48d15efSToby Isaac while (nz--) { 905e48d15efSToby Isaac idx = 4 * (*vi++); 9069371c9d4SSatish Balay xw[0] = x[idx]; 9079371c9d4SSatish Balay xw[1] = x[1 + idx]; 9089371c9d4SSatish Balay xw[2] = x[2 + idx]; 9099371c9d4SSatish Balay xw[3] = x[3 + idx]; 910e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 911e48d15efSToby Isaac v += 16; 912e48d15efSToby Isaac } 913e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 9149371c9d4SSatish Balay x[i2] += xw[0]; 9159371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9169371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9179371c9d4SSatish Balay x[i2 + 3] += xw[3]; 918e48d15efSToby Isaac idiag += 16; 919e48d15efSToby Isaac i2 += 4; 920e48d15efSToby Isaac } 921e48d15efSToby Isaac break; 922e48d15efSToby Isaac case 5: 923e48d15efSToby Isaac for (i = 0; i < m; i++) { 924e48d15efSToby Isaac v = aa + 25 * ai[i]; 925e48d15efSToby Isaac vi = aj + ai[i]; 926e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9279371c9d4SSatish Balay s[0] = b[i2]; 9289371c9d4SSatish Balay s[1] = b[i2 + 1]; 9299371c9d4SSatish Balay s[2] = b[i2 + 2]; 9309371c9d4SSatish Balay s[3] = b[i2 + 3]; 9319371c9d4SSatish Balay s[4] = b[i2 + 4]; 932e48d15efSToby Isaac while (nz--) { 933e48d15efSToby Isaac idx = 5 * (*vi++); 9349371c9d4SSatish Balay xw[0] = x[idx]; 9359371c9d4SSatish Balay xw[1] = x[1 + idx]; 9369371c9d4SSatish Balay xw[2] = x[2 + idx]; 9379371c9d4SSatish Balay xw[3] = x[3 + idx]; 9389371c9d4SSatish Balay xw[4] = x[4 + idx]; 939e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 940e48d15efSToby Isaac v += 25; 941e48d15efSToby Isaac } 942e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 9439371c9d4SSatish Balay x[i2] += xw[0]; 9449371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9459371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9469371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9479371c9d4SSatish Balay x[i2 + 4] += xw[4]; 948e48d15efSToby Isaac idiag += 25; 949e48d15efSToby Isaac i2 += 5; 950e48d15efSToby Isaac } 951e48d15efSToby Isaac break; 952e48d15efSToby Isaac case 6: 953e48d15efSToby Isaac for (i = 0; i < m; i++) { 954e48d15efSToby Isaac v = aa + 36 * ai[i]; 955e48d15efSToby Isaac vi = aj + ai[i]; 956e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9579371c9d4SSatish Balay s[0] = b[i2]; 9589371c9d4SSatish Balay s[1] = b[i2 + 1]; 9599371c9d4SSatish Balay s[2] = b[i2 + 2]; 9609371c9d4SSatish Balay s[3] = b[i2 + 3]; 9619371c9d4SSatish Balay s[4] = b[i2 + 4]; 9629371c9d4SSatish Balay s[5] = b[i2 + 5]; 963e48d15efSToby Isaac while (nz--) { 964e48d15efSToby Isaac idx = 6 * (*vi++); 9659371c9d4SSatish Balay xw[0] = x[idx]; 9669371c9d4SSatish Balay xw[1] = x[1 + idx]; 9679371c9d4SSatish Balay xw[2] = x[2 + idx]; 9689371c9d4SSatish Balay xw[3] = x[3 + idx]; 9699371c9d4SSatish Balay xw[4] = x[4 + idx]; 9709371c9d4SSatish Balay xw[5] = x[5 + idx]; 971e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 972e48d15efSToby Isaac v += 36; 973e48d15efSToby Isaac } 974e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 9759371c9d4SSatish Balay x[i2] += xw[0]; 9769371c9d4SSatish Balay x[i2 + 1] += xw[1]; 9779371c9d4SSatish Balay x[i2 + 2] += xw[2]; 9789371c9d4SSatish Balay x[i2 + 3] += xw[3]; 9799371c9d4SSatish Balay x[i2 + 4] += xw[4]; 9809371c9d4SSatish Balay x[i2 + 5] += xw[5]; 981e48d15efSToby Isaac idiag += 36; 982e48d15efSToby Isaac i2 += 6; 983e48d15efSToby Isaac } 984e48d15efSToby Isaac break; 985e48d15efSToby Isaac case 7: 986e48d15efSToby Isaac for (i = 0; i < m; i++) { 987e48d15efSToby Isaac v = aa + 49 * ai[i]; 988e48d15efSToby Isaac vi = aj + ai[i]; 989e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 9909371c9d4SSatish Balay s[0] = b[i2]; 9919371c9d4SSatish Balay s[1] = b[i2 + 1]; 9929371c9d4SSatish Balay s[2] = b[i2 + 2]; 9939371c9d4SSatish Balay s[3] = b[i2 + 3]; 9949371c9d4SSatish Balay s[4] = b[i2 + 4]; 9959371c9d4SSatish Balay s[5] = b[i2 + 5]; 9969371c9d4SSatish Balay s[6] = b[i2 + 6]; 997e48d15efSToby Isaac while (nz--) { 998e48d15efSToby Isaac idx = 7 * (*vi++); 9999371c9d4SSatish Balay xw[0] = x[idx]; 10009371c9d4SSatish Balay xw[1] = x[1 + idx]; 10019371c9d4SSatish Balay xw[2] = x[2 + idx]; 10029371c9d4SSatish Balay xw[3] = x[3 + idx]; 10039371c9d4SSatish Balay xw[4] = x[4 + idx]; 10049371c9d4SSatish Balay xw[5] = x[5 + idx]; 10059371c9d4SSatish Balay xw[6] = x[6 + idx]; 1006e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1007e48d15efSToby Isaac v += 49; 1008e48d15efSToby Isaac } 1009e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 10109371c9d4SSatish Balay x[i2] += xw[0]; 10119371c9d4SSatish Balay x[i2 + 1] += xw[1]; 10129371c9d4SSatish Balay x[i2 + 2] += xw[2]; 10139371c9d4SSatish Balay x[i2 + 3] += xw[3]; 10149371c9d4SSatish Balay x[i2 + 4] += xw[4]; 10159371c9d4SSatish Balay x[i2 + 5] += xw[5]; 10169371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1017e48d15efSToby Isaac idiag += 49; 1018e48d15efSToby Isaac i2 += 7; 1019e48d15efSToby Isaac } 1020e48d15efSToby Isaac break; 1021e48d15efSToby Isaac default: 1022e48d15efSToby Isaac for (i = 0; i < m; i++) { 1023e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1024e48d15efSToby Isaac vi = aj + ai[i]; 1025e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1026e48d15efSToby Isaac 10279566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1028e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1029e48d15efSToby Isaac workt = work; 1030e48d15efSToby Isaac for (j = 0; j < nz; j++) { 10319566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1032e48d15efSToby Isaac workt += bs; 1033e48d15efSToby Isaac } 1034e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1035e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1036e48d15efSToby Isaac 1037e48d15efSToby Isaac idiag += bs2; 1038e48d15efSToby Isaac i2 += bs; 1039e48d15efSToby Isaac } 1040e48d15efSToby Isaac break; 1041e48d15efSToby Isaac } 10429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1043e48d15efSToby Isaac } 1044e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1045e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1); 1046e48d15efSToby Isaac i2 = bs * (m - 1); 1047e48d15efSToby Isaac switch (bs) { 1048e48d15efSToby Isaac case 1: 1049e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1050e48d15efSToby Isaac v = aa + ai[i]; 1051e48d15efSToby Isaac vi = aj + ai[i]; 1052e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1053e48d15efSToby Isaac s[0] = b[i2]; 1054e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1055e48d15efSToby Isaac xw[0] = x[vi[j]]; 1056e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1057e48d15efSToby Isaac } 1058e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1059e48d15efSToby Isaac x[i2] += xw[0]; 1060e48d15efSToby Isaac idiag -= 1; 1061e48d15efSToby Isaac i2 -= 1; 1062e48d15efSToby Isaac } 1063e48d15efSToby Isaac break; 1064e48d15efSToby Isaac case 2: 1065e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1066e48d15efSToby Isaac v = aa + 4 * ai[i]; 1067e48d15efSToby Isaac vi = aj + ai[i]; 1068e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10699371c9d4SSatish Balay s[0] = b[i2]; 10709371c9d4SSatish Balay s[1] = b[i2 + 1]; 1071e48d15efSToby Isaac for (j = 0; j < nz; j++) { 1072e48d15efSToby Isaac idx = 2 * vi[j]; 1073e48d15efSToby Isaac it = 4 * j; 10749371c9d4SSatish Balay xw[0] = x[idx]; 10759371c9d4SSatish Balay xw[1] = x[1 + idx]; 1076e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1077e48d15efSToby Isaac } 1078e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 10799371c9d4SSatish Balay x[i2] += xw[0]; 10809371c9d4SSatish Balay x[i2 + 1] += xw[1]; 1081e48d15efSToby Isaac idiag -= 4; 1082e48d15efSToby Isaac i2 -= 2; 1083e48d15efSToby Isaac } 1084e48d15efSToby Isaac break; 1085e48d15efSToby Isaac case 3: 1086e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1087e48d15efSToby Isaac v = aa + 9 * ai[i]; 1088e48d15efSToby Isaac vi = aj + ai[i]; 1089e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 10909371c9d4SSatish Balay s[0] = b[i2]; 10919371c9d4SSatish Balay s[1] = b[i2 + 1]; 10929371c9d4SSatish Balay s[2] = b[i2 + 2]; 1093e48d15efSToby Isaac while (nz--) { 1094e48d15efSToby Isaac idx = 3 * (*vi++); 10959371c9d4SSatish Balay xw[0] = x[idx]; 10969371c9d4SSatish Balay xw[1] = x[1 + idx]; 10979371c9d4SSatish Balay xw[2] = x[2 + idx]; 1098e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1099e48d15efSToby Isaac v += 9; 1100e48d15efSToby Isaac } 1101e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 11029371c9d4SSatish Balay x[i2] += xw[0]; 11039371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11049371c9d4SSatish Balay x[i2 + 2] += xw[2]; 1105e48d15efSToby Isaac idiag -= 9; 1106e48d15efSToby Isaac i2 -= 3; 1107e48d15efSToby Isaac } 1108e48d15efSToby Isaac break; 1109e48d15efSToby Isaac case 4: 1110e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1111e48d15efSToby Isaac v = aa + 16 * ai[i]; 1112e48d15efSToby Isaac vi = aj + ai[i]; 1113e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11149371c9d4SSatish Balay s[0] = b[i2]; 11159371c9d4SSatish Balay s[1] = b[i2 + 1]; 11169371c9d4SSatish Balay s[2] = b[i2 + 2]; 11179371c9d4SSatish Balay s[3] = b[i2 + 3]; 1118e48d15efSToby Isaac while (nz--) { 1119e48d15efSToby Isaac idx = 4 * (*vi++); 11209371c9d4SSatish Balay xw[0] = x[idx]; 11219371c9d4SSatish Balay xw[1] = x[1 + idx]; 11229371c9d4SSatish Balay xw[2] = x[2 + idx]; 11239371c9d4SSatish Balay xw[3] = x[3 + idx]; 1124e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1125e48d15efSToby Isaac v += 16; 1126e48d15efSToby Isaac } 1127e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 11289371c9d4SSatish Balay x[i2] += xw[0]; 11299371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11309371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11319371c9d4SSatish Balay x[i2 + 3] += xw[3]; 1132e48d15efSToby Isaac idiag -= 16; 1133e48d15efSToby Isaac i2 -= 4; 1134e48d15efSToby Isaac } 1135e48d15efSToby Isaac break; 1136e48d15efSToby Isaac case 5: 1137e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1138e48d15efSToby Isaac v = aa + 25 * ai[i]; 1139e48d15efSToby Isaac vi = aj + ai[i]; 1140e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11419371c9d4SSatish Balay s[0] = b[i2]; 11429371c9d4SSatish Balay s[1] = b[i2 + 1]; 11439371c9d4SSatish Balay s[2] = b[i2 + 2]; 11449371c9d4SSatish Balay s[3] = b[i2 + 3]; 11459371c9d4SSatish Balay s[4] = b[i2 + 4]; 1146e48d15efSToby Isaac while (nz--) { 1147e48d15efSToby Isaac idx = 5 * (*vi++); 11489371c9d4SSatish Balay xw[0] = x[idx]; 11499371c9d4SSatish Balay xw[1] = x[1 + idx]; 11509371c9d4SSatish Balay xw[2] = x[2 + idx]; 11519371c9d4SSatish Balay xw[3] = x[3 + idx]; 11529371c9d4SSatish Balay xw[4] = x[4 + idx]; 1153e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1154e48d15efSToby Isaac v += 25; 1155e48d15efSToby Isaac } 1156e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 11579371c9d4SSatish Balay x[i2] += xw[0]; 11589371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11599371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11609371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11619371c9d4SSatish Balay x[i2 + 4] += xw[4]; 1162e48d15efSToby Isaac idiag -= 25; 1163e48d15efSToby Isaac i2 -= 5; 1164e48d15efSToby Isaac } 1165e48d15efSToby Isaac break; 1166e48d15efSToby Isaac case 6: 1167e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1168e48d15efSToby Isaac v = aa + 36 * ai[i]; 1169e48d15efSToby Isaac vi = aj + ai[i]; 1170e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 11719371c9d4SSatish Balay s[0] = b[i2]; 11729371c9d4SSatish Balay s[1] = b[i2 + 1]; 11739371c9d4SSatish Balay s[2] = b[i2 + 2]; 11749371c9d4SSatish Balay s[3] = b[i2 + 3]; 11759371c9d4SSatish Balay s[4] = b[i2 + 4]; 11769371c9d4SSatish Balay s[5] = b[i2 + 5]; 1177e48d15efSToby Isaac while (nz--) { 1178e48d15efSToby Isaac idx = 6 * (*vi++); 11799371c9d4SSatish Balay xw[0] = x[idx]; 11809371c9d4SSatish Balay xw[1] = x[1 + idx]; 11819371c9d4SSatish Balay xw[2] = x[2 + idx]; 11829371c9d4SSatish Balay xw[3] = x[3 + idx]; 11839371c9d4SSatish Balay xw[4] = x[4 + idx]; 11849371c9d4SSatish Balay xw[5] = x[5 + idx]; 1185e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1186e48d15efSToby Isaac v += 36; 1187e48d15efSToby Isaac } 1188e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 11899371c9d4SSatish Balay x[i2] += xw[0]; 11909371c9d4SSatish Balay x[i2 + 1] += xw[1]; 11919371c9d4SSatish Balay x[i2 + 2] += xw[2]; 11929371c9d4SSatish Balay x[i2 + 3] += xw[3]; 11939371c9d4SSatish Balay x[i2 + 4] += xw[4]; 11949371c9d4SSatish Balay x[i2 + 5] += xw[5]; 1195e48d15efSToby Isaac idiag -= 36; 1196e48d15efSToby Isaac i2 -= 6; 1197e48d15efSToby Isaac } 1198e48d15efSToby Isaac break; 1199e48d15efSToby Isaac case 7: 1200e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1201e48d15efSToby Isaac v = aa + 49 * ai[i]; 1202e48d15efSToby Isaac vi = aj + ai[i]; 1203e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 12049371c9d4SSatish Balay s[0] = b[i2]; 12059371c9d4SSatish Balay s[1] = b[i2 + 1]; 12069371c9d4SSatish Balay s[2] = b[i2 + 2]; 12079371c9d4SSatish Balay s[3] = b[i2 + 3]; 12089371c9d4SSatish Balay s[4] = b[i2 + 4]; 12099371c9d4SSatish Balay s[5] = b[i2 + 5]; 12109371c9d4SSatish Balay s[6] = b[i2 + 6]; 1211e48d15efSToby Isaac while (nz--) { 1212e48d15efSToby Isaac idx = 7 * (*vi++); 12139371c9d4SSatish Balay xw[0] = x[idx]; 12149371c9d4SSatish Balay xw[1] = x[1 + idx]; 12159371c9d4SSatish Balay xw[2] = x[2 + idx]; 12169371c9d4SSatish Balay xw[3] = x[3 + idx]; 12179371c9d4SSatish Balay xw[4] = x[4 + idx]; 12189371c9d4SSatish Balay xw[5] = x[5 + idx]; 12199371c9d4SSatish Balay xw[6] = x[6 + idx]; 1220e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1221e48d15efSToby Isaac v += 49; 1222e48d15efSToby Isaac } 1223e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 12249371c9d4SSatish Balay x[i2] += xw[0]; 12259371c9d4SSatish Balay x[i2 + 1] += xw[1]; 12269371c9d4SSatish Balay x[i2 + 2] += xw[2]; 12279371c9d4SSatish Balay x[i2 + 3] += xw[3]; 12289371c9d4SSatish Balay x[i2 + 4] += xw[4]; 12299371c9d4SSatish Balay x[i2 + 5] += xw[5]; 12309371c9d4SSatish Balay x[i2 + 6] += xw[6]; 1231e48d15efSToby Isaac idiag -= 49; 1232e48d15efSToby Isaac i2 -= 7; 1233e48d15efSToby Isaac } 1234e48d15efSToby Isaac break; 1235e48d15efSToby Isaac default: 1236e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) { 1237e48d15efSToby Isaac v = aa + bs2 * ai[i]; 1238e48d15efSToby Isaac vi = aj + ai[i]; 1239e48d15efSToby Isaac nz = ai[i + 1] - ai[i]; 1240e48d15efSToby Isaac 12419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs)); 1242e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */ 1243e48d15efSToby Isaac workt = work; 1244e48d15efSToby Isaac for (j = 0; j < nz; j++) { 12459566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1246e48d15efSToby Isaac workt += bs; 1247e48d15efSToby Isaac } 1248e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1249e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1250e48d15efSToby Isaac 1251e48d15efSToby Isaac idiag -= bs2; 1252e48d15efSToby Isaac i2 -= bs; 1253e48d15efSToby Isaac } 1254e48d15efSToby Isaac break; 1255e48d15efSToby Isaac } 12569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1257e48d15efSToby Isaac } 1258e48d15efSToby Isaac } 12599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 1261de80f912SBarry Smith PetscFunctionReturn(0); 1262de80f912SBarry Smith } 1263de80f912SBarry Smith 1264af674e45SBarry Smith /* 126581824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 1266af674e45SBarry Smith */ 1267af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1268af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1269af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1270af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 1271af674e45SBarry Smith #endif 1272af674e45SBarry Smith 12739371c9d4SSatish Balay PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) { 1274af674e45SBarry Smith Mat A = *AA; 1275af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1276c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1277c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 127817ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1; 1279f15d580aSBarry Smith const PetscScalar *value = v; 12804bb09213Spetsc MatScalar *ap, *aa = a->a, *bap; 1281af674e45SBarry Smith 1282af674e45SBarry Smith PetscFunctionBegin; 1283ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1284af674e45SBarry Smith stepval = (n - 1) * 4; 1285af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 1286af674e45SBarry Smith row = im[k]; 1287af674e45SBarry Smith rp = aj + ai[row]; 1288af674e45SBarry Smith ap = aa + 16 * ai[row]; 1289af674e45SBarry Smith nrow = ailen[row]; 1290af674e45SBarry Smith low = 0; 129117ec6a02SBarry Smith high = nrow; 1292af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 1293af674e45SBarry Smith col = in[l]; 1294db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1295db4deed7SKarl Rupp else high = nrow; 129617ec6a02SBarry Smith lastcol = col; 12971e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4; 1298af674e45SBarry Smith while (high - low > 7) { 1299af674e45SBarry Smith t = (low + high) / 2; 1300af674e45SBarry Smith if (rp[t] > col) high = t; 1301af674e45SBarry Smith else low = t; 1302af674e45SBarry Smith } 1303af674e45SBarry Smith for (i = low; i < high; i++) { 1304af674e45SBarry Smith if (rp[i] > col) break; 1305af674e45SBarry Smith if (rp[i] == col) { 1306af674e45SBarry Smith bap = ap + 16 * i; 1307af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1308ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1309af674e45SBarry Smith } 1310af674e45SBarry Smith goto noinsert2; 1311af674e45SBarry Smith } 1312af674e45SBarry Smith } 1313af674e45SBarry Smith N = nrow++ - 1; 131417ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1315af674e45SBarry Smith /* shift up all the later entries in this row */ 1316af674e45SBarry Smith for (ii = N; ii >= i; ii--) { 1317af674e45SBarry Smith rp[ii + 1] = rp[ii]; 13189566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1319af674e45SBarry Smith } 132048a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1321af674e45SBarry Smith rp[i] = col; 1322af674e45SBarry Smith bap = ap + 16 * i; 1323af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) { 1324ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1325af674e45SBarry Smith } 1326af674e45SBarry Smith noinsert2:; 1327af674e45SBarry Smith low = i; 1328af674e45SBarry Smith } 1329af674e45SBarry Smith ailen[row] = nrow; 1330af674e45SBarry Smith } 1331be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1332af674e45SBarry Smith } 1333af674e45SBarry Smith 1334af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1335af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1336af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1337af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1338af674e45SBarry Smith #endif 1339af674e45SBarry Smith 13409371c9d4SSatish Balay PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) { 1341af674e45SBarry Smith Mat A = *AA; 1342af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1343580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1344c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 1345c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol; 134617ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1; 1347af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap; 1348af674e45SBarry Smith 1349af674e45SBarry Smith PetscFunctionBegin; 1350af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 13519371c9d4SSatish Balay row = im[k]; 13529371c9d4SSatish Balay brow = row / 4; 1353af674e45SBarry Smith rp = aj + ai[brow]; 1354af674e45SBarry Smith ap = aa + 16 * ai[brow]; 1355af674e45SBarry Smith nrow = ailen[brow]; 1356af674e45SBarry Smith low = 0; 135717ec6a02SBarry Smith high = nrow; 1358af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 13599371c9d4SSatish Balay col = in[l]; 13609371c9d4SSatish Balay bcol = col / 4; 13619371c9d4SSatish Balay ridx = row % 4; 13629371c9d4SSatish Balay cidx = col % 4; 1363af674e45SBarry Smith value = v[l + k * n]; 1364db4deed7SKarl Rupp if (col <= lastcol) low = 0; 1365db4deed7SKarl Rupp else high = nrow; 136617ec6a02SBarry Smith lastcol = col; 1367af674e45SBarry Smith while (high - low > 7) { 1368af674e45SBarry Smith t = (low + high) / 2; 1369af674e45SBarry Smith if (rp[t] > bcol) high = t; 1370af674e45SBarry Smith else low = t; 1371af674e45SBarry Smith } 1372af674e45SBarry Smith for (i = low; i < high; i++) { 1373af674e45SBarry Smith if (rp[i] > bcol) break; 1374af674e45SBarry Smith if (rp[i] == bcol) { 1375af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx; 1376af674e45SBarry Smith *bap += value; 1377af674e45SBarry Smith goto noinsert1; 1378af674e45SBarry Smith } 1379af674e45SBarry Smith } 1380af674e45SBarry Smith N = nrow++ - 1; 138117ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1382af674e45SBarry Smith /* shift up all the later entries in this row */ 13839566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 13849566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 13859566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1386af674e45SBarry Smith rp[i] = bcol; 1387af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value; 1388af674e45SBarry Smith noinsert1:; 1389af674e45SBarry Smith low = i; 1390af674e45SBarry Smith } 1391af674e45SBarry Smith ailen[brow] = nrow; 1392af674e45SBarry Smith } 1393be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1394af674e45SBarry Smith } 1395af674e45SBarry Smith 1396be5855fcSBarry Smith /* 1397be5855fcSBarry Smith Checks for missing diagonals 1398be5855fcSBarry Smith */ 13999371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 1400be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14017734d3b5SMatthew G. Knepley PetscInt *diag, *ii = a->i, i; 1402be5855fcSBarry Smith 1403be5855fcSBarry Smith PetscFunctionBegin; 14049566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 14052af78befSBarry Smith *missing = PETSC_FALSE; 14067734d3b5SMatthew G. Knepley if (A->rmap->n > 0 && !ii) { 14072efa7f71SHong Zhang *missing = PETSC_TRUE; 14082efa7f71SHong Zhang if (d) *d = 0; 14099566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 14102efa7f71SHong Zhang } else { 141101445905SHong Zhang PetscInt n; 141201445905SHong Zhang n = PetscMin(a->mbs, a->nbs); 1413883fce79SBarry Smith diag = a->diag; 141401445905SHong Zhang for (i = 0; i < n; i++) { 14157734d3b5SMatthew G. Knepley if (diag[i] >= ii[i + 1]) { 14162af78befSBarry Smith *missing = PETSC_TRUE; 14172af78befSBarry Smith if (d) *d = i; 14189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1419358d2f5dSShri Abhyankar break; 14202efa7f71SHong Zhang } 1421be5855fcSBarry Smith } 1422be5855fcSBarry Smith } 1423be5855fcSBarry Smith PetscFunctionReturn(0); 1424be5855fcSBarry Smith } 1425be5855fcSBarry Smith 14269371c9d4SSatish Balay PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) { 1427de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 142809f38230SBarry Smith PetscInt i, j, m = a->mbs; 1429de6a44a3SBarry Smith 14303a40ed3dSBarry Smith PetscFunctionBegin; 143109f38230SBarry Smith if (!a->diag) { 14329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->diag)); 14334fd072dbSBarry Smith a->free_diag = PETSC_TRUE; 143409f38230SBarry Smith } 14357fc0212eSBarry Smith for (i = 0; i < m; i++) { 143609f38230SBarry Smith a->diag[i] = a->i[i + 1]; 1437de6a44a3SBarry Smith for (j = a->i[i]; j < a->i[i + 1]; j++) { 1438de6a44a3SBarry Smith if (a->j[j] == i) { 143909f38230SBarry Smith a->diag[i] = j; 1440de6a44a3SBarry Smith break; 1441de6a44a3SBarry Smith } 1442de6a44a3SBarry Smith } 1443de6a44a3SBarry Smith } 14443a40ed3dSBarry Smith PetscFunctionReturn(0); 1445de6a44a3SBarry Smith } 14462593348eSBarry Smith 14479371c9d4SSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) { 14483b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14491a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 14501a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 14513b2fbd54SBarry Smith 14523a40ed3dSBarry Smith PetscFunctionBegin; 14533b2fbd54SBarry Smith *nn = n; 14543a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 14553b2fbd54SBarry Smith if (symmetric) { 14569566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1457553b3c51SBarry Smith nz = tia[n]; 14583b2fbd54SBarry Smith } else { 14599371c9d4SSatish Balay tia = a->i; 14609371c9d4SSatish Balay tja = a->j; 14613b2fbd54SBarry Smith } 14623b2fbd54SBarry Smith 1463ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1464ecc77c7aSBarry Smith (*nn) *= bs; 14658f7157efSSatish Balay /* malloc & create the natural set of indices */ 14669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia)); 14679985e31cSBarry Smith if (n) { 14682462f5fdSStefano Zampini (*ia)[0] = oshift; 1469ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 14709985e31cSBarry Smith } 1471ecc77c7aSBarry Smith 1472ecc77c7aSBarry Smith for (i = 1; i < n; i++) { 1473ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1474ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 14758f7157efSSatish Balay } 1476ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1477ecc77c7aSBarry Smith 14781a83f524SJed Brown if (inja) { 14799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja)); 14809985e31cSBarry Smith cnt = 0; 14819985e31cSBarry Smith for (i = 0; i < n; i++) { 14829985e31cSBarry Smith for (j = 0; j < bs; j++) { 14839985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) { 1484ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 14859985e31cSBarry Smith } 14869985e31cSBarry Smith } 14879985e31cSBarry Smith } 14889985e31cSBarry Smith } 14899985e31cSBarry Smith 14908f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 14919566063dSJacob Faibussowitsch PetscCall(PetscFree(tia)); 14929566063dSJacob Faibussowitsch PetscCall(PetscFree(tja)); 14938f7157efSSatish Balay } 1494f6d58c54SBarry Smith } else if (oshift == 1) { 1495715a17b5SBarry Smith if (symmetric) { 1496a2ea699eSBarry Smith nz = tia[A->rmap->n / bs]; 1497715a17b5SBarry Smith /* add 1 to i and j indices */ 1498715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1499715a17b5SBarry Smith *ia = tia; 1500715a17b5SBarry Smith if (ja) { 1501715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1502715a17b5SBarry Smith *ja = tja; 1503715a17b5SBarry Smith } 1504715a17b5SBarry Smith } else { 1505a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs]; 1506f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 15079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1508f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1509f6d58c54SBarry Smith if (ja) { 15109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja)); 1511f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1512f6d58c54SBarry Smith } 1513715a17b5SBarry Smith } 15148f7157efSSatish Balay } else { 15158f7157efSSatish Balay *ia = tia; 1516ecc77c7aSBarry Smith if (ja) *ja = tja; 15178f7157efSSatish Balay } 15183a40ed3dSBarry Smith PetscFunctionReturn(0); 15193b2fbd54SBarry Smith } 15203b2fbd54SBarry Smith 15219371c9d4SSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 15223a40ed3dSBarry Smith PetscFunctionBegin; 15233a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1524715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 15259566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 15269566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja)); 15273b2fbd54SBarry Smith } 15283a40ed3dSBarry Smith PetscFunctionReturn(0); 15293b2fbd54SBarry Smith } 15303b2fbd54SBarry Smith 15319371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqBAIJ(Mat A) { 15322d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15332d61bbb3SSatish Balay 1534433994e6SBarry Smith PetscFunctionBegin; 1535aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1536c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz); 15372d61bbb3SSatish Balay #endif 15389566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 15399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 15409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 15419566063dSJacob Faibussowitsch if (a->free_diag) PetscCall(PetscFree(a->diag)); 15429566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag)); 15439566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 15449566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work)); 15459566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work)); 15469566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt)); 15479566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work)); 15489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 15499566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values)); 15509566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1551c4319e64SHong Zhang 15529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 15539566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent)); 15549566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1555901853e0SKris Buschelman 15569566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 15579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 15589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 15599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 15609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 15619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 15629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 15639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 15649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 15659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 15669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 15679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 15687ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 15699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 15707ea3e4caSstefano_zampini #endif 15719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 15722e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 15732d61bbb3SSatish Balay PetscFunctionReturn(0); 15742d61bbb3SSatish Balay } 15752d61bbb3SSatish Balay 15769371c9d4SSatish Balay PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) { 15772d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 15782d61bbb3SSatish Balay 15792d61bbb3SSatish Balay PetscFunctionBegin; 1580aa275fccSKris Buschelman switch (op) { 15819371c9d4SSatish Balay case MAT_ROW_ORIENTED: a->roworiented = flg; break; 15829371c9d4SSatish Balay case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break; 15839371c9d4SSatish Balay case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break; 15849371c9d4SSatish Balay case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break; 15859371c9d4SSatish Balay case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break; 15869371c9d4SSatish Balay case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break; 15878c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1588aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1589aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 15909371c9d4SSatish Balay case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 15915021d80fSJed Brown case MAT_SPD: 159277e54ba9SKris Buschelman case MAT_SYMMETRIC: 159377e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15949a4540c5SBarry Smith case MAT_HERMITIAN: 15959a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1596b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1597c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1598672ba085SHong Zhang case MAT_STRUCTURE_ONLY: 1599b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1600b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 160177e54ba9SKris Buschelman break; 16029371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 16032d61bbb3SSatish Balay } 16042d61bbb3SSatish Balay PetscFunctionReturn(0); 16052d61bbb3SSatish Balay } 16062d61bbb3SSatish Balay 160752768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */ 16089371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) { 160952768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 161052768537SHong Zhang MatScalar *aa_i; 161187828ca2SBarry Smith PetscScalar *v_i; 16122d61bbb3SSatish Balay 16132d61bbb3SSatish Balay PetscFunctionBegin; 1614d0f46423SBarry Smith bs = A->rmap->bs; 161552768537SHong Zhang bs2 = bs * bs; 16165f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 16172d61bbb3SSatish Balay 16182d61bbb3SSatish Balay bn = row / bs; /* Block number */ 16192d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 16202d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn]; 16212d61bbb3SSatish Balay *nz = bs * M; 16222d61bbb3SSatish Balay 16232d61bbb3SSatish Balay if (v) { 1624f4259b30SLisandro Dalcin *v = NULL; 16252d61bbb3SSatish Balay if (*nz) { 16269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v)); 16272d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16282d61bbb3SSatish Balay v_i = *v + i * bs; 16292d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i); 163026fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 16312d61bbb3SSatish Balay } 16322d61bbb3SSatish Balay } 16332d61bbb3SSatish Balay } 16342d61bbb3SSatish Balay 16352d61bbb3SSatish Balay if (idx) { 1636f4259b30SLisandro Dalcin *idx = NULL; 16372d61bbb3SSatish Balay if (*nz) { 16389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx)); 16392d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */ 16402d61bbb3SSatish Balay idx_i = *idx + i * bs; 16412d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i]; 164226fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++; 16432d61bbb3SSatish Balay } 16442d61bbb3SSatish Balay } 16452d61bbb3SSatish Balay } 16462d61bbb3SSatish Balay PetscFunctionReturn(0); 16472d61bbb3SSatish Balay } 16482d61bbb3SSatish Balay 16499371c9d4SSatish Balay PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 165052768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 165152768537SHong Zhang 165252768537SHong Zhang PetscFunctionBegin; 16539566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 165452768537SHong Zhang PetscFunctionReturn(0); 165552768537SHong Zhang } 165652768537SHong Zhang 16579371c9d4SSatish Balay PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 16582d61bbb3SSatish Balay PetscFunctionBegin; 1659cb4a9cd9SHong Zhang if (nz) *nz = 0; 16609566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx)); 16619566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v)); 16622d61bbb3SSatish Balay PetscFunctionReturn(0); 16632d61bbb3SSatish Balay } 16642d61bbb3SSatish Balay 16659371c9d4SSatish Balay PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) { 166620e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 16672d61bbb3SSatish Balay Mat C; 166820e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 166920e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 167020e84f26SHong Zhang MatScalar *ata, *aa = a->a; 16712d61bbb3SSatish Balay 16722d61bbb3SSatish Balay PetscFunctionBegin; 16737fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 16749566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1675cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 167620e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 16772d61bbb3SSatish Balay 16789566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 16799566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 16809566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 16819566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 168220e84f26SHong Zhang 168320e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 168420e84f26SHong Zhang ati = at->i; 168520e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1686fc4dec0aSBarry Smith } else { 1687fc4dec0aSBarry Smith C = *B; 168820e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data; 168920e84f26SHong Zhang ati = at->i; 1690fc4dec0aSBarry Smith } 1691fc4dec0aSBarry Smith 169220e84f26SHong Zhang atj = at->j; 169320e84f26SHong Zhang ata = at->a; 169420e84f26SHong Zhang 169520e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 16969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs)); 169720e84f26SHong Zhang 169820e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 16992d61bbb3SSatish Balay for (i = 0; i < mbs; i++) { 170020e84f26SHong Zhang anzj = ai[i + 1] - ai[i]; 170120e84f26SHong Zhang for (j = 0; j < anzj; j++) { 170220e84f26SHong Zhang atj[atfill[*aj]] = i; 170320e84f26SHong Zhang for (kr = 0; kr < bs; kr++) { 1704ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 17052d61bbb3SSatish Balay } 170620e84f26SHong Zhang atfill[*aj++] += 1; 170720e84f26SHong Zhang } 170820e84f26SHong Zhang } 17099566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 17109566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 17112d61bbb3SSatish Balay 171220e84f26SHong Zhang /* Clean up temporary space and complete requests. */ 17139566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill)); 171420e84f26SHong Zhang 1715cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 17169566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 17172d61bbb3SSatish Balay *B = C; 17182d61bbb3SSatish Balay } else { 17199566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 17202d61bbb3SSatish Balay } 17212d61bbb3SSatish Balay PetscFunctionReturn(0); 17222d61bbb3SSatish Balay } 17232d61bbb3SSatish Balay 17249371c9d4SSatish Balay PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) { 1725453d3561SHong Zhang Mat Btrans; 1726453d3561SHong Zhang 1727453d3561SHong Zhang PetscFunctionBegin; 1728453d3561SHong Zhang *f = PETSC_FALSE; 1729acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 17309566063dSJacob Faibussowitsch PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 17319566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans)); 1732453d3561SHong Zhang PetscFunctionReturn(0); 1733453d3561SHong Zhang } 1734453d3561SHong Zhang 1735618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 17369371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 1737b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1738b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1739b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1740b51a4376SLisandro Dalcin PetscScalar *matvals; 17412593348eSBarry Smith 17423a40ed3dSBarry Smith PetscFunctionBegin; 17439566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 17443b2fbd54SBarry Smith 1745b51a4376SLisandro Dalcin M = mat->rmap->N; 1746b51a4376SLisandro Dalcin N = mat->cmap->N; 1747b51a4376SLisandro Dalcin m = mat->rmap->n; 1748b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1749b51a4376SLisandro Dalcin nz = bs * bs * A->nz; 17502593348eSBarry Smith 1751b51a4376SLisandro Dalcin /* write matrix header */ 1752b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID; 17539371c9d4SSatish Balay header[1] = M; 17549371c9d4SSatish Balay header[2] = N; 17559371c9d4SSatish Balay header[3] = nz; 17569566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 17572593348eSBarry Smith 1758b51a4376SLisandro Dalcin /* store row lengths */ 17599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1760b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 17619371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 17629566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 17639566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1764b51a4376SLisandro Dalcin 1765b51a4376SLisandro Dalcin /* store column indices */ 17669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1767b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1768b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1769b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17709371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 17715f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17729566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 17739566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 17742593348eSBarry Smith 17752593348eSBarry Smith /* store nonzero values */ 17769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1777b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 1778b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) 1779b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++) 17809371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 17815f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 17829566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 17839566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1784ce6f0cecSBarry Smith 1785b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 17869566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 17873a40ed3dSBarry Smith PetscFunctionReturn(0); 17882593348eSBarry Smith } 17892593348eSBarry Smith 17909371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) { 17917dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 17927dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k; 17937dc0baabSHong Zhang 17947dc0baabSHong Zhang PetscFunctionBegin; 17959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 17967dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) { 17979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 179848a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 17999566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18007dc0baabSHong Zhang } 18019566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18027dc0baabSHong Zhang PetscFunctionReturn(0); 18037dc0baabSHong Zhang } 18047dc0baabSHong Zhang 18059371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) { 1806b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1807d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1808f3ef73ceSBarry Smith PetscViewerFormat format; 18092593348eSBarry Smith 18103a40ed3dSBarry Smith PetscFunctionBegin; 18117dc0baabSHong Zhang if (A->structure_only) { 18129566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 18137dc0baabSHong Zhang PetscFunctionReturn(0); 18147dc0baabSHong Zhang } 18157dc0baabSHong Zhang 18169566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1817456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 18189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1819fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1820ade3a672SBarry Smith const char *matname; 1821bcd9e38bSBarry Smith Mat aij; 18229566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 18239566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 18249566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 18259566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer)); 18269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij)); 182704929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 182804929863SHong Zhang PetscFunctionReturn(0); 1829fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 18309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 183144cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) { 183244cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) { 18339566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 183444cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) { 183544cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) { 1836aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18370e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18389371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18390e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18409371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18410e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 18429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 18430ef38995SBarry Smith } 184444cd7ae7SLois Curfman McInnes #else 184548a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 184644cd7ae7SLois Curfman McInnes #endif 184744cd7ae7SLois Curfman McInnes } 184844cd7ae7SLois Curfman McInnes } 18499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 185044cd7ae7SLois Curfman McInnes } 185144cd7ae7SLois Curfman McInnes } 18529566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 18530ef38995SBarry Smith } else { 18549566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1855b6490206SBarry Smith for (i = 0; i < a->mbs; i++) { 1856b6490206SBarry Smith for (j = 0; j < bs; j++) { 18579566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1858b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) { 1859b6490206SBarry Smith for (l = 0; l < bs; l++) { 1860aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 18610e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 18629371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18630e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 18649371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 18650ef38995SBarry Smith } else { 18669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 186788685aaeSLois Curfman McInnes } 186888685aaeSLois Curfman McInnes #else 18699566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 187088685aaeSLois Curfman McInnes #endif 18712593348eSBarry Smith } 18722593348eSBarry Smith } 18739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 18742593348eSBarry Smith } 18752593348eSBarry Smith } 18769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1877b6490206SBarry Smith } 18789566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 18793a40ed3dSBarry Smith PetscFunctionReturn(0); 18802593348eSBarry Smith } 18812593348eSBarry Smith 18829804daf3SBarry Smith #include <petscdraw.h> 18839371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) { 188477ed5343SBarry Smith Mat A = (Mat)Aa; 18853270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1886d0f46423SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 18870e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 18883f1db9ecSBarry Smith MatScalar *aa; 1889b0a32e0cSBarry Smith PetscViewer viewer; 1890b3e7f47fSJed Brown PetscViewerFormat format; 18913270192aSSatish Balay 18923a40ed3dSBarry Smith PetscFunctionBegin; 18939566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 18949566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 18959566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 189677ed5343SBarry Smith 18973270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1898b3e7f47fSJed Brown 1899b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1900d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1901383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */ 1902b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 19033270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19043270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19059371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19069371c9d4SSatish Balay y_r = y_l + 1.0; 19079371c9d4SSatish Balay x_l = a->j[j] * bs; 19089371c9d4SSatish Balay x_r = x_l + 1.0; 19093270192aSSatish Balay aa = a->a + j * bs2; 19103270192aSSatish Balay for (k = 0; k < bs; k++) { 19113270192aSSatish Balay for (l = 0; l < bs; l++) { 19120e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 19139566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19143270192aSSatish Balay } 19153270192aSSatish Balay } 19163270192aSSatish Balay } 19173270192aSSatish Balay } 1918b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 19193270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19203270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19219371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19229371c9d4SSatish Balay y_r = y_l + 1.0; 19239371c9d4SSatish Balay x_l = a->j[j] * bs; 19249371c9d4SSatish Balay x_r = x_l + 1.0; 19253270192aSSatish Balay aa = a->a + j * bs2; 19263270192aSSatish Balay for (k = 0; k < bs; k++) { 19273270192aSSatish Balay for (l = 0; l < bs; l++) { 19280e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 19299566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19303270192aSSatish Balay } 19313270192aSSatish Balay } 19323270192aSSatish Balay } 19333270192aSSatish Balay } 1934b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 19353270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) { 19363270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) { 19379371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19389371c9d4SSatish Balay y_r = y_l + 1.0; 19399371c9d4SSatish Balay x_l = a->j[j] * bs; 19409371c9d4SSatish Balay x_r = x_l + 1.0; 19413270192aSSatish Balay aa = a->a + j * bs2; 19423270192aSSatish Balay for (k = 0; k < bs; k++) { 19433270192aSSatish Balay for (l = 0; l < bs; l++) { 19440e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 19459566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 19463270192aSSatish Balay } 19473270192aSSatish Balay } 19483270192aSSatish Balay } 19493270192aSSatish Balay } 1950d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1951b3e7f47fSJed Brown } else { 1952b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */ 1953b3e7f47fSJed Brown /* first determine max of all nonzero values */ 1954b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0; 1955b3e7f47fSJed Brown PetscDraw popup; 1956b3e7f47fSJed Brown 1957b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) { 1958b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1959b3e7f47fSJed Brown } 1960383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL; 19619566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup)); 19629566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1963383922c3SLisandro Dalcin 1964d0609cedSBarry Smith PetscDrawCollectiveBegin(draw); 1965b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) { 1966b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) { 19679371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0; 19689371c9d4SSatish Balay y_r = y_l + 1.0; 19699371c9d4SSatish Balay x_l = a->j[j] * bs; 19709371c9d4SSatish Balay x_r = x_l + 1.0; 1971b3e7f47fSJed Brown aa = a->a + j * bs2; 1972b3e7f47fSJed Brown for (k = 0; k < bs; k++) { 1973b3e7f47fSJed Brown for (l = 0; l < bs; l++) { 1974383922c3SLisandro Dalcin MatScalar v = *aa++; 1975383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 19769566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1977b3e7f47fSJed Brown } 1978b3e7f47fSJed Brown } 1979b3e7f47fSJed Brown } 1980b3e7f47fSJed Brown } 1981d0609cedSBarry Smith PetscDrawCollectiveEnd(draw); 1982b3e7f47fSJed Brown } 198377ed5343SBarry Smith PetscFunctionReturn(0); 198477ed5343SBarry Smith } 19853270192aSSatish Balay 19869371c9d4SSatish Balay static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) { 19870e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h; 1988b0a32e0cSBarry Smith PetscDraw draw; 1989ace3abfcSBarry Smith PetscBool isnull; 19903270192aSSatish Balay 199177ed5343SBarry Smith PetscFunctionBegin; 19929566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 19939566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 199445f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 199577ed5343SBarry Smith 19969371c9d4SSatish Balay xr = A->cmap->n; 19979371c9d4SSatish Balay yr = A->rmap->N; 19989371c9d4SSatish Balay h = yr / 10.0; 19999371c9d4SSatish Balay w = xr / 10.0; 20009371c9d4SSatish Balay xr += w; 20019371c9d4SSatish Balay yr += h; 20029371c9d4SSatish Balay xl = -w; 20039371c9d4SSatish Balay yl = -h; 20049566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 20059566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 20069566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 20079566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 20089566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw)); 20093a40ed3dSBarry Smith PetscFunctionReturn(0); 20103270192aSSatish Balay } 20113270192aSSatish Balay 20129371c9d4SSatish Balay PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) { 2013ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw; 20142593348eSBarry Smith 20153a40ed3dSBarry Smith PetscFunctionBegin; 20169566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 20179566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 20189566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 201932077d6dSBarry Smith if (iascii) { 20209566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 20210f5bd95cSBarry Smith } else if (isbinary) { 20229566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 20230f5bd95cSBarry Smith } else if (isdraw) { 20249566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 20255cd90555SBarry Smith } else { 2026a5e6ed63SBarry Smith Mat B; 20279566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 20289566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 20299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 20302593348eSBarry Smith } 20313a40ed3dSBarry Smith PetscFunctionReturn(0); 20322593348eSBarry Smith } 2033b6490206SBarry Smith 20349371c9d4SSatish Balay PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) { 2035cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2036c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2037c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen; 2038d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 203997e567efSBarry Smith MatScalar *ap, *aa = a->a; 2040cd0e1443SSatish Balay 20413a40ed3dSBarry Smith PetscFunctionBegin; 20422d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */ 20439371c9d4SSatish Balay row = im[k]; 20449371c9d4SSatish Balay brow = row / bs; 20459371c9d4SSatish Balay if (row < 0) { 20469371c9d4SSatish Balay v += n; 20479371c9d4SSatish Balay continue; 20489371c9d4SSatish Balay } /* negative row */ 204954c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2050d29f2997SMatthew Woehlke rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2051d29f2997SMatthew Woehlke ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 20522c3acbe9SBarry Smith nrow = ailen[brow]; 20532d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */ 20549371c9d4SSatish Balay if (in[l] < 0) { 20559371c9d4SSatish Balay v++; 20569371c9d4SSatish Balay continue; 20579371c9d4SSatish Balay } /* negative column */ 205854c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 20592d61bbb3SSatish Balay col = in[l]; 20602d61bbb3SSatish Balay bcol = col / bs; 20612d61bbb3SSatish Balay cidx = col % bs; 20622d61bbb3SSatish Balay ridx = row % bs; 20632d61bbb3SSatish Balay high = nrow; 20642d61bbb3SSatish Balay low = 0; /* assume unsorted */ 20652d61bbb3SSatish Balay while (high - low > 5) { 2066cd0e1443SSatish Balay t = (low + high) / 2; 2067cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 2068cd0e1443SSatish Balay else low = t; 2069cd0e1443SSatish Balay } 2070cd0e1443SSatish Balay for (i = low; i < high; i++) { 2071cd0e1443SSatish Balay if (rp[i] > bcol) break; 2072cd0e1443SSatish Balay if (rp[i] == bcol) { 20732d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx]; 20742d61bbb3SSatish Balay goto finished; 2075cd0e1443SSatish Balay } 2076cd0e1443SSatish Balay } 207797e567efSBarry Smith *v++ = 0.0; 20782d61bbb3SSatish Balay finished:; 2079cd0e1443SSatish Balay } 2080cd0e1443SSatish Balay } 20813a40ed3dSBarry Smith PetscFunctionReturn(0); 2082cd0e1443SSatish Balay } 2083cd0e1443SSatish Balay 20849371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 208592c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2086e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2087c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2088d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2089ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2090dd6ea824SBarry Smith const PetscScalar *value = v; 20919d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap; 209292c4ed94SBarry Smith 20933a40ed3dSBarry Smith PetscFunctionBegin; 20940e324ae4SSatish Balay if (roworiented) { 20950e324ae4SSatish Balay stepval = (n - 1) * bs; 20960e324ae4SSatish Balay } else { 20970e324ae4SSatish Balay stepval = (m - 1) * bs; 20980e324ae4SSatish Balay } 209992c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */ 210092c4ed94SBarry Smith row = im[k]; 21015ef9f2a5SBarry Smith if (row < 0) continue; 21026bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 210392c4ed94SBarry Smith rp = aj + ai[row]; 21047dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row]; 210592c4ed94SBarry Smith rmax = imax[row]; 210692c4ed94SBarry Smith nrow = ailen[row]; 210792c4ed94SBarry Smith low = 0; 2108c71e6ed7SBarry Smith high = nrow; 210992c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */ 21105ef9f2a5SBarry Smith if (in[l] < 0) continue; 21116bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 211292c4ed94SBarry Smith col = in[l]; 21137dc0baabSHong Zhang if (!A->structure_only) { 211492c4ed94SBarry Smith if (roworiented) { 211553ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs; 21160e324ae4SSatish Balay } else { 211753ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs; 211892c4ed94SBarry Smith } 21197dc0baabSHong Zhang } 212026fbe8dcSKarl Rupp if (col <= lastcol) low = 0; 212126fbe8dcSKarl Rupp else high = nrow; 2122e2ee6c50SBarry Smith lastcol = col; 212392c4ed94SBarry Smith while (high - low > 7) { 212492c4ed94SBarry Smith t = (low + high) / 2; 212592c4ed94SBarry Smith if (rp[t] > col) high = t; 212692c4ed94SBarry Smith else low = t; 212792c4ed94SBarry Smith } 212892c4ed94SBarry Smith for (i = low; i < high; i++) { 212992c4ed94SBarry Smith if (rp[i] > col) break; 213092c4ed94SBarry Smith if (rp[i] == col) { 21317dc0baabSHong Zhang if (A->structure_only) goto noinsert2; 21328a84c255SSatish Balay bap = ap + bs2 * i; 21330e324ae4SSatish Balay if (roworiented) { 21348a84c255SSatish Balay if (is == ADD_VALUES) { 2135dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2136ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2137dd9472c6SBarry Smith } 21380e324ae4SSatish Balay } else { 2139dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2140ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2141dd9472c6SBarry Smith } 2142dd9472c6SBarry Smith } 21430e324ae4SSatish Balay } else { 21440e324ae4SSatish Balay if (is == ADD_VALUES) { 214553ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2146ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 214753ef36baSBarry Smith bap += bs; 2148dd9472c6SBarry Smith } 21490e324ae4SSatish Balay } else { 215053ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2151ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 215253ef36baSBarry Smith bap += bs; 21538a84c255SSatish Balay } 2154dd9472c6SBarry Smith } 2155dd9472c6SBarry Smith } 2156f1241b54SBarry Smith goto noinsert2; 215792c4ed94SBarry Smith } 215892c4ed94SBarry Smith } 215989280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 21605f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 21617dc0baabSHong Zhang if (A->structure_only) { 21627dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 21637dc0baabSHong Zhang } else { 2164fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 21657dc0baabSHong Zhang } 21669371c9d4SSatish Balay N = nrow++ - 1; 21679371c9d4SSatish Balay high++; 216892c4ed94SBarry Smith /* shift up all the later entries in this row */ 21699566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 217092c4ed94SBarry Smith rp[i] = col; 21717dc0baabSHong Zhang if (!A->structure_only) { 21729566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 21738a84c255SSatish Balay bap = ap + bs2 * i; 21740e324ae4SSatish Balay if (roworiented) { 2175dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2176ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2177dd9472c6SBarry Smith } 21780e324ae4SSatish Balay } else { 2179dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 2180ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2181dd9472c6SBarry Smith } 2182dd9472c6SBarry Smith } 21837dc0baabSHong Zhang } 2184f1241b54SBarry Smith noinsert2:; 218592c4ed94SBarry Smith low = i; 218692c4ed94SBarry Smith } 218792c4ed94SBarry Smith ailen[row] = nrow; 218892c4ed94SBarry Smith } 21893a40ed3dSBarry Smith PetscFunctionReturn(0); 219092c4ed94SBarry Smith } 219126e093fcSHong Zhang 21929371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) { 2193584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2194580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2195d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2196c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 21973f1db9ecSBarry Smith MatScalar *aa = a->a, *ap; 21983447b6efSHong Zhang PetscReal ratio = 0.6; 2199584200bdSSatish Balay 22003a40ed3dSBarry Smith PetscFunctionBegin; 22013a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2202584200bdSSatish Balay 220343ee02c3SBarry Smith if (m) rmax = ailen[0]; 2204584200bdSSatish Balay for (i = 1; i < mbs; i++) { 2205584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 2206584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1]; 2207d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]); 2208584200bdSSatish Balay if (fshift) { 2209580bdb30SBarry Smith ip = aj + ai[i]; 2210580bdb30SBarry Smith ap = aa + bs2 * ai[i]; 2211584200bdSSatish Balay N = ailen[i]; 22129566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N)); 221348a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2214672ba085SHong Zhang } 2215584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1]; 2216584200bdSSatish Balay } 2217584200bdSSatish Balay if (mbs) { 2218584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1]; 2219584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2220584200bdSSatish Balay } 22217c565772SBarry Smith 2222584200bdSSatish Balay /* reset ilen and imax for each row */ 22237c565772SBarry Smith a->nonzerorowcnt = 0; 2224672ba085SHong Zhang if (A->structure_only) { 22259566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen)); 2226672ba085SHong Zhang } else { /* !A->structure_only */ 2227584200bdSSatish Balay for (i = 0; i < mbs; i++) { 2228584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i]; 22297c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2230584200bdSSatish Balay } 2231672ba085SHong Zhang } 2232a7c10996SSatish Balay a->nz = ai[mbs]; 2233584200bdSSatish Balay 2234584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 2235b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 2236584200bdSSatish Balay if (fshift && a->diag) { 22379566063dSJacob Faibussowitsch PetscCall(PetscFree(a->diag)); 2238f4259b30SLisandro Dalcin a->diag = NULL; 2239584200bdSSatish Balay } 22405f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 22419566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 22429566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 22439566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 224426fbe8dcSKarl Rupp 22458e58a170SBarry Smith A->info.mallocs += a->reallocs; 2246e2f3b5e9SSatish Balay a->reallocs = 0; 22470e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2; 2248647a6520SHong Zhang a->rmax = rmax; 2249cf4441caSHong Zhang 225048a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 22513a40ed3dSBarry Smith PetscFunctionReturn(0); 2252584200bdSSatish Balay } 2253584200bdSSatish Balay 2254bea157c4SSatish Balay /* 2255bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 2256bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2257a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2258bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 2259bea157c4SSatish Balay */ 22609371c9d4SSatish Balay static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) { 2261c1ac3661SBarry Smith PetscInt i, j, k, row; 2262ace3abfcSBarry Smith PetscBool flg; 22633a40ed3dSBarry Smith 2264433994e6SBarry Smith PetscFunctionBegin; 2265bea157c4SSatish Balay for (i = 0, j = 0; i < n; j++) { 2266bea157c4SSatish Balay row = idx[i]; 2267a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */ 2268bea157c4SSatish Balay sizes[j] = 1; 2269bea157c4SSatish Balay i++; 2270e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2271bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2272bea157c4SSatish Balay i++; 22736aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */ 2274bea157c4SSatish Balay flg = PETSC_TRUE; 2275bea157c4SSatish Balay for (k = 1; k < bs; k++) { 2276bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */ 2277bea157c4SSatish Balay flg = PETSC_FALSE; 2278bea157c4SSatish Balay break; 2279d9b7c43dSSatish Balay } 2280bea157c4SSatish Balay } 2281abc0a331SBarry Smith if (flg) { /* No break in the bs */ 2282bea157c4SSatish Balay sizes[j] = bs; 2283bea157c4SSatish Balay i += bs; 2284bea157c4SSatish Balay } else { 2285bea157c4SSatish Balay sizes[j] = 1; 2286bea157c4SSatish Balay i++; 2287bea157c4SSatish Balay } 2288bea157c4SSatish Balay } 2289bea157c4SSatish Balay } 2290bea157c4SSatish Balay *bs_max = j; 22913a40ed3dSBarry Smith PetscFunctionReturn(0); 2292d9b7c43dSSatish Balay } 2293d9b7c43dSSatish Balay 22949371c9d4SSatish Balay PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 2295d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2296f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows; 2297d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 229887828ca2SBarry Smith PetscScalar zero = 0.0; 22993f1db9ecSBarry Smith MatScalar *aa; 230097b48c8fSBarry Smith const PetscScalar *xx; 230197b48c8fSBarry Smith PetscScalar *bb; 2302d9b7c43dSSatish Balay 23033a40ed3dSBarry Smith PetscFunctionBegin; 230497b48c8fSBarry Smith /* fix right hand side if needed */ 230597b48c8fSBarry Smith if (x && b) { 23069566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23079566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 2308ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 23099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 23109566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 231197b48c8fSBarry Smith } 231297b48c8fSBarry Smith 2313d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2314bea157c4SSatish Balay /* allocate memory for rows,sizes */ 23159566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2316bea157c4SSatish Balay 2317563b5814SBarry Smith /* copy IS values to rows, and sort them */ 231826fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 23199566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows)); 232097b48c8fSBarry Smith 2321a9817697SBarry Smith if (baij->keepnonzeropattern) { 232226fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1; 2323dffd3267SBarry Smith bs_max = is_n; 2324dffd3267SBarry Smith } else { 23259566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2326e56f5c9eSBarry Smith A->nonzerostate++; 2327dffd3267SBarry Smith } 2328bea157c4SSatish Balay 2329bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2330bea157c4SSatish Balay row = rows[j]; 23315f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2332bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2333b31fbe3bSSatish Balay aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2334a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2335d4a378daSJed Brown if (diag != (PetscScalar)0.0) { 2336bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) { 2337bea157c4SSatish Balay baij->ilen[row / bs] = 1; 2338bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs; 233926fbe8dcSKarl Rupp 23409566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs)); 2341a07cd24cSSatish Balay } 2342563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 234348a46eb9SPierre Jolivet for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2344f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2345bea157c4SSatish Balay baij->ilen[row / bs] = 0; 2346f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2347bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 23486bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2349bea157c4SSatish Balay for (k = 0; k < count; k++) { 2350d9b7c43dSSatish Balay aa[0] = zero; 2351d9b7c43dSSatish Balay aa += bs; 2352d9b7c43dSSatish Balay } 235348a46eb9SPierre Jolivet if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2354d9b7c43dSSatish Balay } 2355bea157c4SSatish Balay } 2356bea157c4SSatish Balay 23579566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes)); 23589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 23593a40ed3dSBarry Smith PetscFunctionReturn(0); 2360d9b7c43dSSatish Balay } 23611c351548SSatish Balay 23629371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) { 236397b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 236497b48c8fSBarry Smith PetscInt i, j, k, count; 236597b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 236697b48c8fSBarry Smith PetscScalar zero = 0.0; 236797b48c8fSBarry Smith MatScalar *aa; 236897b48c8fSBarry Smith const PetscScalar *xx; 236997b48c8fSBarry Smith PetscScalar *bb; 237056777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE; 237197b48c8fSBarry Smith 237297b48c8fSBarry Smith PetscFunctionBegin; 237397b48c8fSBarry Smith /* fix right hand side if needed */ 237497b48c8fSBarry Smith if (x && b) { 23759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 23769566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 237756777dd2SBarry Smith vecs = PETSC_TRUE; 237897b48c8fSBarry Smith } 237997b48c8fSBarry Smith 238097b48c8fSBarry Smith /* zero the columns */ 23819566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 238297b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 23835f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 238497b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE; 238597b48c8fSBarry Smith } 238697b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) { 238797b48c8fSBarry Smith if (!zeroed[i]) { 238897b48c8fSBarry Smith row = i / bs; 238997b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 239097b48c8fSBarry Smith for (k = 0; k < bs; k++) { 239197b48c8fSBarry Smith col = bs * baij->j[j] + k; 239297b48c8fSBarry Smith if (zeroed[col]) { 239397b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 239456777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col]; 239597b48c8fSBarry Smith aa[0] = 0.0; 239697b48c8fSBarry Smith } 239797b48c8fSBarry Smith } 239897b48c8fSBarry Smith } 239956777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i]; 240097b48c8fSBarry Smith } 24019566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed)); 240256777dd2SBarry Smith if (vecs) { 24039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 24049566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 240556777dd2SBarry Smith } 240697b48c8fSBarry Smith 240797b48c8fSBarry Smith /* zero the rows */ 240897b48c8fSBarry Smith for (i = 0; i < is_n; i++) { 240997b48c8fSBarry Smith row = is_idx[i]; 241097b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 241197b48c8fSBarry Smith aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 241297b48c8fSBarry Smith for (k = 0; k < count; k++) { 241397b48c8fSBarry Smith aa[0] = zero; 241497b48c8fSBarry Smith aa += bs; 241597b48c8fSBarry Smith } 2416dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 241797b48c8fSBarry Smith } 24189566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 241997b48c8fSBarry Smith PetscFunctionReturn(0); 242097b48c8fSBarry Smith } 242197b48c8fSBarry Smith 24229371c9d4SSatish Balay PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { 24232d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2424e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2425c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2426d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2427c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2; 2428ace3abfcSBarry Smith PetscBool roworiented = a->roworiented; 2429d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 24302d61bbb3SSatish Balay 24312d61bbb3SSatish Balay PetscFunctionBegin; 24322d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */ 2433085a36d4SBarry Smith row = im[k]; 2434085a36d4SBarry Smith brow = row / bs; 24355ef9f2a5SBarry Smith if (row < 0) continue; 24366bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 24372d61bbb3SSatish Balay rp = aj + ai[brow]; 2438672ba085SHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[brow]; 24392d61bbb3SSatish Balay rmax = imax[brow]; 24402d61bbb3SSatish Balay nrow = ailen[brow]; 24412d61bbb3SSatish Balay low = 0; 2442c71e6ed7SBarry Smith high = nrow; 24432d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */ 24445ef9f2a5SBarry Smith if (in[l] < 0) continue; 24456bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 24469371c9d4SSatish Balay col = in[l]; 24479371c9d4SSatish Balay bcol = col / bs; 24489371c9d4SSatish Balay ridx = row % bs; 24499371c9d4SSatish Balay cidx = col % bs; 2450672ba085SHong Zhang if (!A->structure_only) { 24512d61bbb3SSatish Balay if (roworiented) { 24525ef9f2a5SBarry Smith value = v[l + k * n]; 24532d61bbb3SSatish Balay } else { 24542d61bbb3SSatish Balay value = v[k + l * m]; 24552d61bbb3SSatish Balay } 2456672ba085SHong Zhang } 24579371c9d4SSatish Balay if (col <= lastcol) low = 0; 24589371c9d4SSatish Balay else high = nrow; 2459e2ee6c50SBarry Smith lastcol = col; 24602d61bbb3SSatish Balay while (high - low > 7) { 24612d61bbb3SSatish Balay t = (low + high) / 2; 24622d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 24632d61bbb3SSatish Balay else low = t; 24642d61bbb3SSatish Balay } 24652d61bbb3SSatish Balay for (i = low; i < high; i++) { 24662d61bbb3SSatish Balay if (rp[i] > bcol) break; 24672d61bbb3SSatish Balay if (rp[i] == bcol) { 24682d61bbb3SSatish Balay bap = ap + bs2 * i + bs * cidx + ridx; 2469672ba085SHong Zhang if (!A->structure_only) { 24702d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 24712d61bbb3SSatish Balay else *bap = value; 2472672ba085SHong Zhang } 24732d61bbb3SSatish Balay goto noinsert1; 24742d61bbb3SSatish Balay } 24752d61bbb3SSatish Balay } 24762d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 24775f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2478672ba085SHong Zhang if (A->structure_only) { 2479672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2480672ba085SHong Zhang } else { 2481fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2482672ba085SHong Zhang } 24839371c9d4SSatish Balay N = nrow++ - 1; 24849371c9d4SSatish Balay high++; 24852d61bbb3SSatish Balay /* shift up all the later entries in this row */ 24869566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 24872d61bbb3SSatish Balay rp[i] = bcol; 2488580bdb30SBarry Smith if (!A->structure_only) { 24899566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 24909566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2491580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value; 2492580bdb30SBarry Smith } 2493085a36d4SBarry Smith a->nz++; 2494e56f5c9eSBarry Smith A->nonzerostate++; 24952d61bbb3SSatish Balay noinsert1:; 24962d61bbb3SSatish Balay low = i; 24972d61bbb3SSatish Balay } 24982d61bbb3SSatish Balay ailen[brow] = nrow; 24992d61bbb3SSatish Balay } 25002d61bbb3SSatish Balay PetscFunctionReturn(0); 25012d61bbb3SSatish Balay } 25022d61bbb3SSatish Balay 25039371c9d4SSatish Balay PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) { 25042d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 25052d61bbb3SSatish Balay Mat outA; 2506ace3abfcSBarry Smith PetscBool row_identity, col_identity; 25072d61bbb3SSatish Balay 25082d61bbb3SSatish Balay PetscFunctionBegin; 25095f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 25109566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity)); 25119566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity)); 25125f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 25132d61bbb3SSatish Balay 25142d61bbb3SSatish Balay outA = inA; 2515d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU; 25169566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype)); 25179566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 25182d61bbb3SSatish Balay 25199566063dSJacob Faibussowitsch PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2520cf242676SKris Buschelman 25219566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row)); 25229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row)); 2523c3122656SLisandro Dalcin a->row = row; 25249566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col)); 25259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col)); 2526c3122656SLisandro Dalcin a->col = col; 2527c38d4ed2SBarry Smith 2528c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 25299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol)); 25309566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2531c38d4ed2SBarry Smith 25329566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2533*4dfa11a4SJacob Faibussowitsch if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); } 25349566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info)); 25352d61bbb3SSatish Balay PetscFunctionReturn(0); 25362d61bbb3SSatish Balay } 2537d9b7c43dSSatish Balay 25389371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) { 253927a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2540bdb1c0e1SJed Brown PetscInt i, nz, mbs; 254127a8da17SBarry Smith 254227a8da17SBarry Smith PetscFunctionBegin; 2543b32cb4a7SJed Brown nz = baij->maxnz; 2544bdb1c0e1SJed Brown mbs = baij->mbs; 2545ad540459SPierre Jolivet for (i = 0; i < nz; i++) baij->j[i] = indices[i]; 254627a8da17SBarry Smith baij->nz = nz; 2547ad540459SPierre Jolivet for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i]; 254827a8da17SBarry Smith PetscFunctionReturn(0); 254927a8da17SBarry Smith } 255027a8da17SBarry Smith 255127a8da17SBarry Smith /*@ 255211a5261eSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 255327a8da17SBarry Smith 255427a8da17SBarry Smith Input Parameters: 255511a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix 255627a8da17SBarry Smith - indices - the column indices 255727a8da17SBarry Smith 255815091d37SBarry Smith Level: advanced 255915091d37SBarry Smith 256027a8da17SBarry Smith Notes: 256127a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 256227a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 256311a5261eSBarry Smith of the `MatSetValues()` operation. 256427a8da17SBarry Smith 256527a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 256611a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 256727a8da17SBarry Smith 256811a5261eSBarry Smith MUST be called before any calls to `MatSetValues()` 256927a8da17SBarry Smith 257011a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSetValues()` 257127a8da17SBarry Smith @*/ 25729371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) { 257327a8da17SBarry Smith PetscFunctionBegin; 25740700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2575dadcf809SJacob Faibussowitsch PetscValidIntPointer(indices, 2); 2576cac4c232SBarry Smith PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 257727a8da17SBarry Smith PetscFunctionReturn(0); 257827a8da17SBarry Smith } 257927a8da17SBarry Smith 25809371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) { 2581273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2582c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2583273d9f13SBarry Smith PetscReal atmp; 258487828ca2SBarry Smith PetscScalar *x, zero = 0.0; 2585273d9f13SBarry Smith MatScalar *aa; 2586c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol; 2587273d9f13SBarry Smith 2588273d9f13SBarry Smith PetscFunctionBegin; 25895f80ce2aSJacob Faibussowitsch /* why is this not a macro???????????????????????????????????????????????????????????????? */ 25905f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2591d0f46423SBarry Smith bs = A->rmap->bs; 2592273d9f13SBarry Smith aa = a->a; 2593273d9f13SBarry Smith ai = a->i; 2594273d9f13SBarry Smith aj = a->j; 2595273d9f13SBarry Smith mbs = a->mbs; 2596273d9f13SBarry Smith 25979566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 25989566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 25999566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n)); 26005f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2601273d9f13SBarry Smith for (i = 0; i < mbs; i++) { 26029371c9d4SSatish Balay ncols = ai[1] - ai[0]; 26039371c9d4SSatish Balay ai++; 2604273d9f13SBarry Smith brow = bs * i; 2605273d9f13SBarry Smith for (j = 0; j < ncols; j++) { 2606273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) { 2607273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) { 26089371c9d4SSatish Balay atmp = PetscAbsScalar(*aa); 26099371c9d4SSatish Balay aa++; 2610273d9f13SBarry Smith row = brow + krow; /* row index */ 26119371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) { 26129371c9d4SSatish Balay x[row] = atmp; 26139371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol; 26149371c9d4SSatish Balay } 2615273d9f13SBarry Smith } 2616273d9f13SBarry Smith } 2617273d9f13SBarry Smith aj++; 2618273d9f13SBarry Smith } 2619273d9f13SBarry Smith } 26209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 2621273d9f13SBarry Smith PetscFunctionReturn(0); 2622273d9f13SBarry Smith } 2623273d9f13SBarry Smith 26249371c9d4SSatish Balay PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) { 26253c896bc6SHong Zhang PetscFunctionBegin; 26263c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 26273c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 26283c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26293c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2630d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 26313c896bc6SHong Zhang 26325f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 26335f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 26349566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 26359566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 26363c896bc6SHong Zhang } else { 26379566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 26383c896bc6SHong Zhang } 26393c896bc6SHong Zhang PetscFunctionReturn(0); 26403c896bc6SHong Zhang } 26413c896bc6SHong Zhang 26429371c9d4SSatish Balay PetscErrorCode MatSetUp_SeqBAIJ(Mat A) { 2643273d9f13SBarry Smith PetscFunctionBegin; 26449566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2645273d9f13SBarry Smith PetscFunctionReturn(0); 2646273d9f13SBarry Smith } 2647273d9f13SBarry Smith 26489371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2649f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 26506e111a19SKarl Rupp 2651f2a5309cSSatish Balay PetscFunctionBegin; 2652f2a5309cSSatish Balay *array = a->a; 2653f2a5309cSSatish Balay PetscFunctionReturn(0); 2654f2a5309cSSatish Balay } 2655f2a5309cSSatish Balay 26569371c9d4SSatish Balay static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) { 2657f2a5309cSSatish Balay PetscFunctionBegin; 2658cda14afcSprj- *array = NULL; 2659f2a5309cSSatish Balay PetscFunctionReturn(0); 2660f2a5309cSSatish Balay } 2661f2a5309cSSatish Balay 26629371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) { 2663b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 266452768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 266552768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 266652768537SHong Zhang 266752768537SHong Zhang PetscFunctionBegin; 266852768537SHong Zhang /* Set the number of nonzeros in the new matrix */ 26699566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 267052768537SHong Zhang PetscFunctionReturn(0); 267152768537SHong Zhang } 267252768537SHong Zhang 26739371c9d4SSatish Balay PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 267442ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 267531ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2676e838b9e7SJed Brown PetscBLASInt one = 1; 267742ee4b1aSHong Zhang 267842ee4b1aSHong Zhang PetscFunctionBegin; 2679134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2680134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2681134adf20SPierre Jolivet if (e) { 26829566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2683134adf20SPierre Jolivet if (e) { 26849566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2685134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN; 2686134adf20SPierre Jolivet } 2687134adf20SPierre Jolivet } 268854c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2689134adf20SPierre Jolivet } 269042ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2691f4df32b1SMatthew Knepley PetscScalar alpha = a; 2692c5df96a5SBarry Smith PetscBLASInt bnz; 26939566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2694792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 26959566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2696ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 26979566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 269842ee4b1aSHong Zhang } else { 269952768537SHong Zhang Mat B; 270052768537SHong Zhang PetscInt *nnz; 270154c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 27029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 27039566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 27049566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 27059566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 27069566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 27079566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 27089566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 27099566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 27109566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 27119566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 27129566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 271342ee4b1aSHong Zhang } 271442ee4b1aSHong Zhang PetscFunctionReturn(0); 271542ee4b1aSHong Zhang } 271642ee4b1aSHong Zhang 27179371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) { 27182726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 27192726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27202726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs]; 27212726fb6dSPierre Jolivet MatScalar *aa = a->a; 27222726fb6dSPierre Jolivet 27232726fb6dSPierre Jolivet PetscFunctionBegin; 27242726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 27252726fb6dSPierre Jolivet #else 27262726fb6dSPierre Jolivet PetscFunctionBegin; 27272726fb6dSPierre Jolivet #endif 27282726fb6dSPierre Jolivet PetscFunctionReturn(0); 27292726fb6dSPierre Jolivet } 27302726fb6dSPierre Jolivet 27319371c9d4SSatish Balay PetscErrorCode MatRealPart_SeqBAIJ(Mat A) { 273299cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 273399cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2734dd6ea824SBarry Smith MatScalar *aa = a->a; 273599cafbc1SBarry Smith 273699cafbc1SBarry Smith PetscFunctionBegin; 273799cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 273899cafbc1SBarry Smith PetscFunctionReturn(0); 273999cafbc1SBarry Smith } 274099cafbc1SBarry Smith 27419371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) { 274299cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 274399cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2744dd6ea824SBarry Smith MatScalar *aa = a->a; 274599cafbc1SBarry Smith 274699cafbc1SBarry Smith PetscFunctionBegin; 274799cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 274899cafbc1SBarry Smith PetscFunctionReturn(0); 274999cafbc1SBarry Smith } 275099cafbc1SBarry Smith 27513acb8795SBarry Smith /* 27522479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 27533acb8795SBarry Smith */ 27549371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 27553acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 27563acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 27573acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col; 27583acb8795SBarry Smith 27593acb8795SBarry Smith PetscFunctionBegin; 27603acb8795SBarry Smith *nn = n; 27613acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 27625f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 27639566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 27649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 27659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 27663acb8795SBarry Smith jj = a->j; 2767ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 27683acb8795SBarry Smith cia[0] = oshift; 2769ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 27709566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 27713acb8795SBarry Smith jj = a->j; 27723acb8795SBarry Smith for (row = 0; row < m; row++) { 27733acb8795SBarry Smith mr = a->i[row + 1] - a->i[row]; 27743acb8795SBarry Smith for (i = 0; i < mr; i++) { 27753acb8795SBarry Smith col = *jj++; 277626fbe8dcSKarl Rupp 27773acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 27783acb8795SBarry Smith } 27793acb8795SBarry Smith } 27809566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 27819371c9d4SSatish Balay *ia = cia; 27829371c9d4SSatish Balay *ja = cja; 27833acb8795SBarry Smith PetscFunctionReturn(0); 27843acb8795SBarry Smith } 27853acb8795SBarry Smith 27869371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 27873acb8795SBarry Smith PetscFunctionBegin; 27883acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 27899566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia)); 27909566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja)); 27913acb8795SBarry Smith PetscFunctionReturn(0); 27923acb8795SBarry Smith } 27933acb8795SBarry Smith 2794525d23c0SHong Zhang /* 2795525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2796525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2797040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2798525d23c0SHong Zhang */ 27999371c9d4SSatish Balay PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2800525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2801c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2802525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col; 2803525d23c0SHong Zhang PetscInt *cspidx; 2804f6d58c54SBarry Smith 2805f6d58c54SBarry Smith PetscFunctionBegin; 2806525d23c0SHong Zhang *nn = n; 2807525d23c0SHong Zhang if (!ia) PetscFunctionReturn(0); 2808f6d58c54SBarry Smith 28099566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths)); 28109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia)); 28119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja)); 28129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx)); 2813525d23c0SHong Zhang jj = a->j; 2814ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++; 2815525d23c0SHong Zhang cia[0] = oshift; 2816ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 28179566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n)); 2818525d23c0SHong Zhang jj = a->j; 2819525d23c0SHong Zhang for (row = 0; row < m; row++) { 2820525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row]; 2821525d23c0SHong Zhang for (i = 0; i < mr; i++) { 2822525d23c0SHong Zhang col = *jj++; 2823525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2824525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2825525d23c0SHong Zhang } 2826525d23c0SHong Zhang } 28279566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths)); 2828071fcb05SBarry Smith *ia = cia; 2829071fcb05SBarry Smith *ja = cja; 2830525d23c0SHong Zhang *spidx = cspidx; 2831525d23c0SHong Zhang PetscFunctionReturn(0); 2832f6d58c54SBarry Smith } 2833f6d58c54SBarry Smith 28349371c9d4SSatish Balay PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) { 2835525d23c0SHong Zhang PetscFunctionBegin; 28369566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 28379566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx)); 2838f6d58c54SBarry Smith PetscFunctionReturn(0); 2839f6d58c54SBarry Smith } 284099cafbc1SBarry Smith 28419371c9d4SSatish Balay PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) { 28427d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 28437d68702bSBarry Smith 28447d68702bSBarry Smith PetscFunctionBegin; 284548a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 28469566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 28477d68702bSBarry Smith PetscFunctionReturn(0); 28487d68702bSBarry Smith } 28497d68702bSBarry Smith 28502593348eSBarry Smith /* -------------------------------------------------------------------*/ 28519371c9d4SSatish Balay static struct _MatOps MatOps_Values = { 28529371c9d4SSatish Balay MatSetValues_SeqBAIJ, 2853cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2854cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2855cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 285697304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 28577c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 28587c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2859f4259b30SLisandro Dalcin NULL, 2860f4259b30SLisandro Dalcin NULL, 2861f4259b30SLisandro Dalcin NULL, 2862f4259b30SLisandro Dalcin /* 10*/ NULL, 2863cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2864f4259b30SLisandro Dalcin NULL, 2865f4259b30SLisandro Dalcin NULL, 2866f2501298SSatish Balay MatTranspose_SeqBAIJ, 286797304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ, 2868cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2869cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2870cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2871cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 2872f4259b30SLisandro Dalcin /* 20*/ NULL, 2873cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2874cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2875cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2876d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ, 2877f4259b30SLisandro Dalcin NULL, 2878f4259b30SLisandro Dalcin NULL, 2879f4259b30SLisandro Dalcin NULL, 2880f4259b30SLisandro Dalcin NULL, 28814994cf47SJed Brown /* 29*/ MatSetUp_SeqBAIJ, 2882f4259b30SLisandro Dalcin NULL, 2883f4259b30SLisandro Dalcin NULL, 2884f4259b30SLisandro Dalcin NULL, 2885f4259b30SLisandro Dalcin NULL, 2886d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ, 2887f4259b30SLisandro Dalcin NULL, 2888f4259b30SLisandro Dalcin NULL, 2889cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2890f4259b30SLisandro Dalcin NULL, 2891d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ, 28927dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ, 2893cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2894cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 28953c896bc6SHong Zhang MatCopy_SeqBAIJ, 2896f4259b30SLisandro Dalcin /* 44*/ NULL, 2897cc2dc46cSBarry Smith MatScale_SeqBAIJ, 28987d68702bSBarry Smith MatShift_SeqBAIJ, 2899f4259b30SLisandro Dalcin NULL, 290097b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ, 2901f4259b30SLisandro Dalcin /* 49*/ NULL, 29023b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 290392c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 29043acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 29053acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 290693dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ, 2907f4259b30SLisandro Dalcin NULL, 2908f4259b30SLisandro Dalcin NULL, 2909090001bdSToby Isaac NULL, 2910d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 29117dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2912b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2913b9b97703SBarry Smith MatView_SeqBAIJ, 2914f4259b30SLisandro Dalcin NULL, 2915f4259b30SLisandro Dalcin NULL, 2916f4259b30SLisandro Dalcin /* 64*/ NULL, 2917f4259b30SLisandro Dalcin NULL, 2918f4259b30SLisandro Dalcin NULL, 2919f4259b30SLisandro Dalcin NULL, 2920f4259b30SLisandro Dalcin NULL, 2921d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2922f4259b30SLisandro Dalcin NULL, 2923c87e5d42SMatthew Knepley MatConvert_Basic, 2924f4259b30SLisandro Dalcin NULL, 2925f4259b30SLisandro Dalcin NULL, 2926f4259b30SLisandro Dalcin /* 74*/ NULL, 2927f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2928f4259b30SLisandro Dalcin NULL, 2929f4259b30SLisandro Dalcin NULL, 2930f4259b30SLisandro Dalcin NULL, 2931f4259b30SLisandro Dalcin /* 79*/ NULL, 2932f4259b30SLisandro Dalcin NULL, 2933f4259b30SLisandro Dalcin NULL, 2934f4259b30SLisandro Dalcin NULL, 29355bba2384SShri Abhyankar MatLoad_SeqBAIJ, 2936f4259b30SLisandro Dalcin /* 84*/ NULL, 2937f4259b30SLisandro Dalcin NULL, 2938f4259b30SLisandro Dalcin NULL, 2939f4259b30SLisandro Dalcin NULL, 2940f4259b30SLisandro Dalcin NULL, 2941f4259b30SLisandro Dalcin /* 89*/ NULL, 2942f4259b30SLisandro Dalcin NULL, 2943f4259b30SLisandro Dalcin NULL, 2944f4259b30SLisandro Dalcin NULL, 2945f4259b30SLisandro Dalcin NULL, 2946f4259b30SLisandro Dalcin /* 94*/ NULL, 2947f4259b30SLisandro Dalcin NULL, 2948f4259b30SLisandro Dalcin NULL, 2949f4259b30SLisandro Dalcin NULL, 2950f4259b30SLisandro Dalcin NULL, 2951f4259b30SLisandro Dalcin /* 99*/ NULL, 2952f4259b30SLisandro Dalcin NULL, 2953f4259b30SLisandro Dalcin NULL, 29542726fb6dSPierre Jolivet MatConjugate_SeqBAIJ, 2955f4259b30SLisandro Dalcin NULL, 2956f4259b30SLisandro Dalcin /*104*/ NULL, 295799cafbc1SBarry Smith MatRealPart_SeqBAIJ, 29582af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 2959f4259b30SLisandro Dalcin NULL, 2960f4259b30SLisandro Dalcin NULL, 2961f4259b30SLisandro Dalcin /*109*/ NULL, 2962f4259b30SLisandro Dalcin NULL, 2963f4259b30SLisandro Dalcin NULL, 2964f4259b30SLisandro Dalcin NULL, 2965547795f9SHong Zhang MatMissingDiagonal_SeqBAIJ, 2966f4259b30SLisandro Dalcin /*114*/ NULL, 2967f4259b30SLisandro Dalcin NULL, 2968f4259b30SLisandro Dalcin NULL, 2969f4259b30SLisandro Dalcin NULL, 2970f4259b30SLisandro Dalcin NULL, 2971f4259b30SLisandro Dalcin /*119*/ NULL, 2972f4259b30SLisandro Dalcin NULL, 2973547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ, 2974d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ, 2975f4259b30SLisandro Dalcin NULL, 2976f4259b30SLisandro Dalcin /*124*/ NULL, 2977857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ, 29783964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ, 2979f4259b30SLisandro Dalcin NULL, 2980f4259b30SLisandro Dalcin NULL, 2981f4259b30SLisandro Dalcin /*129*/ NULL, 2982f4259b30SLisandro Dalcin NULL, 2983f4259b30SLisandro Dalcin NULL, 2984f4259b30SLisandro Dalcin NULL, 2985f4259b30SLisandro Dalcin NULL, 2986f4259b30SLisandro Dalcin /*134*/ NULL, 2987f4259b30SLisandro Dalcin NULL, 2988f4259b30SLisandro Dalcin NULL, 2989f4259b30SLisandro Dalcin NULL, 2990f4259b30SLisandro Dalcin NULL, 299146533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2992f4259b30SLisandro Dalcin NULL, 2993f4259b30SLisandro Dalcin NULL, 2994bdf6f3fcSHong Zhang MatFDColoringSetUp_SeqXAIJ, 2995f4259b30SLisandro Dalcin NULL, 299686e85357SHong Zhang /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2997d70f29a3SPierre Jolivet MatDestroySubMatrices_SeqBAIJ, 2998d70f29a3SPierre Jolivet NULL, 299999a7f59eSMark Adams NULL, 300099a7f59eSMark Adams NULL, 30017fb60732SBarry Smith NULL, 30027fb60732SBarry Smith /*150*/ NULL, 300399cafbc1SBarry Smith }; 30042593348eSBarry Smith 30059371c9d4SSatish Balay PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) { 30063e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30078ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30083e90b805SBarry Smith 30093e90b805SBarry Smith PetscFunctionBegin; 30105f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30113e90b805SBarry Smith 30123e90b805SBarry Smith /* allocate space for values if not already there */ 3013*4dfa11a4SJacob Faibussowitsch if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); } 30143e90b805SBarry Smith 30153e90b805SBarry Smith /* copy values over */ 30169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 30173e90b805SBarry Smith PetscFunctionReturn(0); 30183e90b805SBarry Smith } 30193e90b805SBarry Smith 30209371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) { 30213e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 30228ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2; 30233e90b805SBarry Smith 30243e90b805SBarry Smith PetscFunctionBegin; 30255f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 30265f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 30273e90b805SBarry Smith 30283e90b805SBarry Smith /* copy values over */ 30299566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 30303e90b805SBarry Smith PetscFunctionReturn(0); 30313e90b805SBarry Smith } 30323e90b805SBarry Smith 3033cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3034cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3035273d9f13SBarry Smith 30369371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) { 3037a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 3038535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2; 30398afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3040a23d5eceSKris Buschelman 3041a23d5eceSKris Buschelman PetscFunctionBegin; 30422576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3043ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 3044ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 3045ab93d7beSBarry Smith nz = 0; 3046ab93d7beSBarry Smith } 30478c07d4e3SBarry Smith 30489566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 30499566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 30509566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 30519566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3052899cda47SBarry Smith 3053899cda47SBarry Smith B->preallocated = PETSC_TRUE; 3054899cda47SBarry Smith 3055d0f46423SBarry Smith mbs = B->rmap->n / bs; 3056d0f46423SBarry Smith nbs = B->cmap->n / bs; 3057a23d5eceSKris Buschelman bs2 = bs * bs; 3058a23d5eceSKris Buschelman 30595f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3060a23d5eceSKris Buschelman 3061a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 30625f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3063a23d5eceSKris Buschelman if (nnz) { 3064a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) { 30655f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 30665f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3067a23d5eceSKris Buschelman } 3068a23d5eceSKris Buschelman } 3069a23d5eceSKris Buschelman 3070a23d5eceSKris Buschelman b = (Mat_SeqBAIJ *)B->data; 3071d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 30729566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3073d0609cedSBarry Smith PetscOptionsEnd(); 30748c07d4e3SBarry Smith 3075a23d5eceSKris Buschelman if (!flg) { 3076a23d5eceSKris Buschelman switch (bs) { 3077a23d5eceSKris Buschelman case 1: 3078a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 3079a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3080a23d5eceSKris Buschelman break; 3081a23d5eceSKris Buschelman case 2: 3082a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 3083a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3084a23d5eceSKris Buschelman break; 3085a23d5eceSKris Buschelman case 3: 3086a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 3087a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3088a23d5eceSKris Buschelman break; 3089a23d5eceSKris Buschelman case 4: 3090a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 3091a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3092a23d5eceSKris Buschelman break; 3093a23d5eceSKris Buschelman case 5: 3094a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 3095a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3096a23d5eceSKris Buschelman break; 3097a23d5eceSKris Buschelman case 6: 3098a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 3099a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3100a23d5eceSKris Buschelman break; 3101a23d5eceSKris Buschelman case 7: 3102a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 3103a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3104a23d5eceSKris Buschelman break; 31059371c9d4SSatish Balay case 9: { 31066679dcc1SBarry Smith PetscInt version = 1; 31079566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31086679dcc1SBarry Smith switch (version) { 31095f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 31106679dcc1SBarry Smith case 1: 311196e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 311296e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 31139566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31146679dcc1SBarry Smith break; 31156679dcc1SBarry Smith #endif 31166679dcc1SBarry Smith default: 311796e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N; 311896e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31199566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 312096e086a2SDaniel Kokron break; 31216679dcc1SBarry Smith } 31226679dcc1SBarry Smith break; 31236679dcc1SBarry Smith } 3124ebada01fSBarry Smith case 11: 3125ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11; 3126ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3127ebada01fSBarry Smith break; 31289371c9d4SSatish Balay case 12: { 31296679dcc1SBarry Smith PetscInt version = 1; 31309566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31316679dcc1SBarry Smith switch (version) { 31326679dcc1SBarry Smith case 1: 31336679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1; 31346679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 31359566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31368ab949d8SShri Abhyankar break; 31376679dcc1SBarry Smith case 2: 31386679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2; 31396679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 31409566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31416679dcc1SBarry Smith break; 31426679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 31436679dcc1SBarry Smith case 3: 31446679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 31456679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 31469566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31476679dcc1SBarry Smith break; 31486679dcc1SBarry Smith #endif 3149a23d5eceSKris Buschelman default: 3150a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 3151a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31529566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31536679dcc1SBarry Smith break; 31546679dcc1SBarry Smith } 31556679dcc1SBarry Smith break; 31566679dcc1SBarry Smith } 31579371c9d4SSatish Balay case 15: { 31586679dcc1SBarry Smith PetscInt version = 1; 31599566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 31606679dcc1SBarry Smith switch (version) { 31616679dcc1SBarry Smith case 1: 31626679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1; 31639566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31646679dcc1SBarry Smith break; 31656679dcc1SBarry Smith case 2: 31666679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2; 31679566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31686679dcc1SBarry Smith break; 31696679dcc1SBarry Smith case 3: 31706679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3; 31719566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31726679dcc1SBarry Smith break; 31736679dcc1SBarry Smith case 4: 31746679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4; 31759566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 31766679dcc1SBarry Smith break; 31776679dcc1SBarry Smith default: 31786679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 31799566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 31806679dcc1SBarry Smith break; 31816679dcc1SBarry Smith } 31826679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31836679dcc1SBarry Smith break; 31846679dcc1SBarry Smith } 31856679dcc1SBarry Smith default: 31866679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 31876679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 31889566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3189a23d5eceSKris Buschelman break; 3190a23d5eceSKris Buschelman } 3191a23d5eceSKris Buschelman } 3192e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ; 3193a23d5eceSKris Buschelman b->mbs = mbs; 3194a23d5eceSKris Buschelman b->nbs = nbs; 3195ab93d7beSBarry Smith if (!skipallocation) { 31962ee49352SLisandro Dalcin if (!b->imax) { 31979566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 319826fbe8dcSKarl Rupp 31994fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE; 32002ee49352SLisandro Dalcin } 3201ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 320226fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3203a23d5eceSKris Buschelman if (!nnz) { 3204a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3205c62bd62aSJed Brown else if (nz < 0) nz = 1; 32065d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs); 3207a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz; 32089566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz)); 3209a23d5eceSKris Buschelman } else { 3210c73702f5SBarry Smith PetscInt64 nz64 = 0; 32119371c9d4SSatish Balay for (i = 0; i < mbs; i++) { 32129371c9d4SSatish Balay b->imax[i] = nnz[i]; 32139371c9d4SSatish Balay nz64 += nnz[i]; 32149371c9d4SSatish Balay } 32159566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz)); 3216a23d5eceSKris Buschelman } 3217a23d5eceSKris Buschelman 3218a23d5eceSKris Buschelman /* allocate the matrix space */ 32199566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3220672ba085SHong Zhang if (B->structure_only) { 32219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &b->j)); 32229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3223672ba085SHong Zhang } else { 32246679dcc1SBarry Smith PetscInt nzbs2 = 0; 32259566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 32269566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 32279566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->a, nz * bs2)); 3228672ba085SHong Zhang } 32299566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(b->j, nz)); 323026fbe8dcSKarl Rupp 3231672ba085SHong Zhang if (B->structure_only) { 3232672ba085SHong Zhang b->singlemalloc = PETSC_FALSE; 3233672ba085SHong Zhang b->free_a = PETSC_FALSE; 3234672ba085SHong Zhang } else { 3235a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 3236672ba085SHong Zhang b->free_a = PETSC_TRUE; 3237672ba085SHong Zhang } 3238672ba085SHong Zhang b->free_ij = PETSC_TRUE; 3239672ba085SHong Zhang 3240a23d5eceSKris Buschelman b->i[0] = 0; 3241ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3242672ba085SHong Zhang 3243e811da20SHong Zhang } else { 3244e6b907acSBarry Smith b->free_a = PETSC_FALSE; 3245e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 3246ab93d7beSBarry Smith } 3247a23d5eceSKris Buschelman 3248a23d5eceSKris Buschelman b->bs2 = bs2; 3249a23d5eceSKris Buschelman b->mbs = mbs; 3250a23d5eceSKris Buschelman b->nz = 0; 3251b32cb4a7SJed Brown b->maxnz = nz; 3252b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3253cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 3254cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 32559566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3256a23d5eceSKris Buschelman PetscFunctionReturn(0); 3257a23d5eceSKris Buschelman } 3258a23d5eceSKris Buschelman 32599371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 3260725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz; 3261f4259b30SLisandro Dalcin PetscScalar *values = NULL; 3262d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3263725b52f3SLisandro Dalcin 3264725b52f3SLisandro Dalcin PetscFunctionBegin; 32655f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 32669566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 32679566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 32689566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 32699566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 32709566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3271d0f46423SBarry Smith m = B->rmap->n / bs; 3272725b52f3SLisandro Dalcin 32735f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 32749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz)); 3275725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3276cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 32775f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3278725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3279725b52f3SLisandro Dalcin nnz[i] = nz; 3280725b52f3SLisandro Dalcin } 32819566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 32829566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3283725b52f3SLisandro Dalcin 3284725b52f3SLisandro Dalcin values = (PetscScalar *)V; 328548a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3286725b52f3SLisandro Dalcin for (i = 0; i < m; i++) { 3287cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 3288cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3289bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { 3290cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 32919566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 32923adadaf3SJed Brown } else { 32933adadaf3SJed Brown PetscInt j; 32943adadaf3SJed Brown for (j = 0; j < ncols; j++) { 32953adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 32969566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 32973adadaf3SJed Brown } 32983adadaf3SJed Brown } 3299725b52f3SLisandro Dalcin } 33009566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 33019566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 33029566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 33039566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3304725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3305725b52f3SLisandro Dalcin } 3306725b52f3SLisandro Dalcin 3307cda14afcSprj- /*@C 330811a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3309cda14afcSprj- 3310cda14afcSprj- Not Collective 3311cda14afcSprj- 3312cda14afcSprj- Input Parameter: 331311a5261eSBarry Smith . mat - a `MATSEQBAIJ` matrix 3314cda14afcSprj- 3315cda14afcSprj- Output Parameter: 3316cda14afcSprj- . array - pointer to the data 3317cda14afcSprj- 3318cda14afcSprj- Level: intermediate 3319cda14afcSprj- 332011a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3321cda14afcSprj- @*/ 33229371c9d4SSatish Balay PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) { 3323cda14afcSprj- PetscFunctionBegin; 3324cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3325cda14afcSprj- PetscFunctionReturn(0); 3326cda14afcSprj- } 3327cda14afcSprj- 3328cda14afcSprj- /*@C 332911a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3330cda14afcSprj- 3331cda14afcSprj- Not Collective 3332cda14afcSprj- 3333cda14afcSprj- Input Parameters: 333411a5261eSBarry Smith + mat - a `MATSEQBAIJ` matrix 3335cda14afcSprj- - array - pointer to the data 3336cda14afcSprj- 3337cda14afcSprj- Level: intermediate 3338cda14afcSprj- 3339db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3340cda14afcSprj- @*/ 33419371c9d4SSatish Balay PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) { 3342cda14afcSprj- PetscFunctionBegin; 3343cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3344cda14afcSprj- PetscFunctionReturn(0); 3345cda14afcSprj- } 3346cda14afcSprj- 33470bad9183SKris Buschelman /*MC 3348fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 33490bad9183SKris Buschelman block sparse compressed row format. 33500bad9183SKris Buschelman 33510bad9183SKris Buschelman Options Database Keys: 33526679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 33536679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 33540bad9183SKris Buschelman 33550bad9183SKris Buschelman Level: beginner 33560cd7f59aSBarry Smith 33570cd7f59aSBarry Smith Notes: 335811a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 335911a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 33600bad9183SKris Buschelman 33616679dcc1SBarry Smith Run with -info to see what version of the matrix-vector product is being used 33626679dcc1SBarry Smith 3363db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()` 33640bad9183SKris Buschelman M*/ 33650bad9183SKris Buschelman 3366cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3367b24902e0SBarry Smith 33689371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) { 3369c1ac3661SBarry Smith PetscMPIInt size; 3370b6490206SBarry Smith Mat_SeqBAIJ *b; 33713b2fbd54SBarry Smith 33723a40ed3dSBarry Smith PetscFunctionBegin; 33739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 33745f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3375b6490206SBarry Smith 3376*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 3377b0a32e0cSBarry Smith B->data = (void *)b; 33789566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 337926fbe8dcSKarl Rupp 3380f4259b30SLisandro Dalcin b->row = NULL; 3381f4259b30SLisandro Dalcin b->col = NULL; 3382f4259b30SLisandro Dalcin b->icol = NULL; 33832593348eSBarry Smith b->reallocs = 0; 3384f4259b30SLisandro Dalcin b->saved_values = NULL; 33852593348eSBarry Smith 3386c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 33872593348eSBarry Smith b->nonew = 0; 3388f4259b30SLisandro Dalcin b->diag = NULL; 3389f4259b30SLisandro Dalcin B->spptr = NULL; 3390b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3391a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 33924e220ebcSLois Curfman McInnes 33939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 33949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 33959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 33969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 33979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 33989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 33999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 34009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 34019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 34029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 34037ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 34049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 34057ea3e4caSstefano_zampini #endif 34069566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 34079566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 34083a40ed3dSBarry Smith PetscFunctionReturn(0); 34092593348eSBarry Smith } 34102593348eSBarry Smith 34119371c9d4SSatish Balay PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) { 3412b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3413a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3414de6a44a3SBarry Smith 34153a40ed3dSBarry Smith PetscFunctionBegin; 34165f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 34172593348eSBarry Smith 34184fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34194fd072dbSBarry Smith c->imax = a->imax; 34204fd072dbSBarry Smith c->ilen = a->ilen; 34214fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE; 34224fd072dbSBarry Smith } else { 34239566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3424b6490206SBarry Smith for (i = 0; i < mbs; i++) { 34252593348eSBarry Smith c->imax[i] = a->imax[i]; 34262593348eSBarry Smith c->ilen[i] = a->ilen[i]; 34272593348eSBarry Smith } 34284fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE; 34294fd072dbSBarry Smith } 34302593348eSBarry Smith 34312593348eSBarry Smith /* allocate the matrix space */ 343216a2bf60SHong Zhang if (mallocmatspace) { 34334fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34349566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 343526fbe8dcSKarl Rupp 34364fd072dbSBarry Smith c->i = a->i; 34374fd072dbSBarry Smith c->j = a->j; 3438379be0ddSLisandro Dalcin c->singlemalloc = PETSC_FALSE; 3439379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 3440379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE; 34414fd072dbSBarry Smith c->parent = A; 34421e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 34431e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 344426fbe8dcSKarl Rupp 34459566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 34469566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34479566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 34484fd072dbSBarry Smith } else { 34499566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 345026fbe8dcSKarl Rupp 3451c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3452379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE; 34534fd072dbSBarry Smith c->free_ij = PETSC_TRUE; 345426fbe8dcSKarl Rupp 34559566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3456b6490206SBarry Smith if (mbs > 0) { 34579566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz)); 34582e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 34599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 34602e8a6d31SBarry Smith } else { 34619566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz)); 34622593348eSBarry Smith } 34632593348eSBarry Smith } 34641e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE; 34651e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE; 346616a2bf60SHong Zhang } 34674fd072dbSBarry Smith } 346816a2bf60SHong Zhang 34692593348eSBarry Smith c->roworiented = a->roworiented; 34702593348eSBarry Smith c->nonew = a->nonew; 347126fbe8dcSKarl Rupp 34729566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 34739566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 347426fbe8dcSKarl Rupp 34755c9eb25fSBarry Smith c->bs2 = a->bs2; 34765c9eb25fSBarry Smith c->mbs = a->mbs; 34775c9eb25fSBarry Smith c->nbs = a->nbs; 34782593348eSBarry Smith 34792593348eSBarry Smith if (a->diag) { 34804fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 34814fd072dbSBarry Smith c->diag = a->diag; 34824fd072dbSBarry Smith c->free_diag = PETSC_FALSE; 34834fd072dbSBarry Smith } else { 34849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 348526fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 34864fd072dbSBarry Smith c->free_diag = PETSC_TRUE; 34874fd072dbSBarry Smith } 3488f4259b30SLisandro Dalcin } else c->diag = NULL; 348926fbe8dcSKarl Rupp 34902593348eSBarry Smith c->nz = a->nz; 3491f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3492f361c04dSBarry Smith c->solve_work = NULL; 3493f361c04dSBarry Smith c->mult_work = NULL; 3494f361c04dSBarry Smith c->sor_workt = NULL; 3495f361c04dSBarry Smith c->sor_work = NULL; 349688e51ccdSHong Zhang 349788e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 349888e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 3499cd6b891eSBarry Smith if (a->compressedrow.use) { 350088e51ccdSHong Zhang i = a->compressedrow.nrows; 35019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 35029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 35039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 350488e51ccdSHong Zhang } else { 350588e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 35060298fd71SBarry Smith c->compressedrow.i = NULL; 35070298fd71SBarry Smith c->compressedrow.rindex = NULL; 350888e51ccdSHong Zhang } 3509e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate; 351026fbe8dcSKarl Rupp 35119566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 35123a40ed3dSBarry Smith PetscFunctionReturn(0); 35132593348eSBarry Smith } 35142593348eSBarry Smith 35159371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) { 3516b24902e0SBarry Smith PetscFunctionBegin; 35179566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 35189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 35199566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 35209566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3521b24902e0SBarry Smith PetscFunctionReturn(0); 3522b24902e0SBarry Smith } 3523b24902e0SBarry Smith 3524618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 35259371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) { 3526b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3527b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs; 3528b51a4376SLisandro Dalcin PetscScalar *matvals; 3529b51a4376SLisandro Dalcin 3530b51a4376SLisandro Dalcin PetscFunctionBegin; 35319566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3532b51a4376SLisandro Dalcin 3533b51a4376SLisandro Dalcin /* read matrix header */ 35349566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 35355f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 35369371c9d4SSatish Balay M = header[1]; 35379371c9d4SSatish Balay N = header[2]; 35389371c9d4SSatish Balay nz = header[3]; 35395f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 35405f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 35415f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3542b51a4376SLisandro Dalcin 3543b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 35449566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3545b51a4376SLisandro Dalcin /* set local and global sizes if not set already */ 3546b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M; 3547b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N; 3548b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3549b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 35509566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 35519566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3552b51a4376SLisandro Dalcin 3553b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 35549566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 35555f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 35569566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 35579566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 35589371c9d4SSatish Balay mbs = m / bs; 35599371c9d4SSatish Balay nbs = n / bs; 3560b51a4376SLisandro Dalcin 3561b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */ 35629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 35639566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 35649371c9d4SSatish Balay rowidxs[0] = 0; 35659371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3566b51a4376SLisandro Dalcin sum = rowidxs[m]; 35675f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3568b51a4376SLisandro Dalcin 3569b51a4376SLisandro Dalcin /* read in column indices and nonzero values */ 35709566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 35719566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 35729566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3573b51a4376SLisandro Dalcin 3574b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3575b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */ 3576b51a4376SLisandro Dalcin PetscInt *nnz; 3577618cc2edSLisandro Dalcin PetscBool sbaij; 3578b51a4376SLisandro Dalcin 35799566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 35809566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz)); 35819566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3582b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 35839566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 3584618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3585618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3586618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3587618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3588618cc2edSLisandro Dalcin if (!sbaij || col >= row) 3589618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3590618cc2edSLisandro Dalcin } 3591618cc2edSLisandro Dalcin } 3592b51a4376SLisandro Dalcin } 35939566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 35949566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 35959566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 35969566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz)); 3597b51a4376SLisandro Dalcin } 3598b51a4376SLisandro Dalcin 3599b51a4376SLisandro Dalcin /* store matrix values */ 3600b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3601b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 36029566063dSJacob Faibussowitsch PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3603b51a4376SLisandro Dalcin } 3604b51a4376SLisandro Dalcin 36059566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 36069566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 36079566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 36089566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3609b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3610b51a4376SLisandro Dalcin } 3611b51a4376SLisandro Dalcin 36129371c9d4SSatish Balay PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) { 36137f489da9SVaclav Hapla PetscBool isbinary; 3614f501eaabSShri Abhyankar 3615f501eaabSShri Abhyankar PetscFunctionBegin; 36169566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 36175f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 36189566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3619f501eaabSShri Abhyankar PetscFunctionReturn(0); 3620f501eaabSShri Abhyankar } 3621f501eaabSShri Abhyankar 3622273d9f13SBarry Smith /*@C 362311a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3624273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3625273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3626273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3627273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 36282593348eSBarry Smith 3629d083f849SBarry Smith Collective 3630273d9f13SBarry Smith 3631273d9f13SBarry Smith Input Parameters: 363211a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 363311a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 363411a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3635273d9f13SBarry Smith . m - number of rows 3636273d9f13SBarry Smith . n - number of columns 363735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 363835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 36390298fd71SBarry Smith (possibly different for each block row) or NULL 3640273d9f13SBarry Smith 3641273d9f13SBarry Smith Output Parameter: 3642273d9f13SBarry Smith . A - the matrix 3643273d9f13SBarry Smith 364411a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3645f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 364611a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3647175b88e8SBarry Smith 3648273d9f13SBarry Smith Options Database Keys: 364911a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3650a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3651273d9f13SBarry Smith 3652273d9f13SBarry Smith Level: intermediate 3653273d9f13SBarry Smith 3654273d9f13SBarry Smith Notes: 3655d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3656d1be2dadSMatthew Knepley 365749a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 365849a6f317SBarry Smith 365935d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 366035d8aa7fSBarry Smith 366111a5261eSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3662273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3663273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3664273d9f13SBarry Smith 3665273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 366611a5261eSBarry Smith Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3667651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3668273d9f13SBarry Smith matrices. 3669273d9f13SBarry Smith 3670651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3671273d9f13SBarry Smith @*/ 36729371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 3673273d9f13SBarry Smith PetscFunctionBegin; 36749566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 36759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 36769566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 36779566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3678273d9f13SBarry Smith PetscFunctionReturn(0); 3679273d9f13SBarry Smith } 3680273d9f13SBarry Smith 3681273d9f13SBarry Smith /*@C 3682273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3683273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3684273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3685273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3686273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3687273d9f13SBarry Smith 3688d083f849SBarry Smith Collective 3689273d9f13SBarry Smith 3690273d9f13SBarry Smith Input Parameters: 36911c4f3114SJed Brown + B - the matrix 369211a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 369311a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3694273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3695273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 36960298fd71SBarry Smith (possibly different for each block row) or NULL 3697273d9f13SBarry Smith 3698273d9f13SBarry Smith Options Database Keys: 369911a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3700a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 3701273d9f13SBarry Smith 3702273d9f13SBarry Smith Level: intermediate 3703273d9f13SBarry Smith 3704273d9f13SBarry Smith Notes: 370549a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 370649a6f317SBarry Smith 370711a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 3708aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3709aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3710aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3711aa95bbe8SBarry Smith 371211a5261eSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3713273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3714273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3715273d9f13SBarry Smith 3716273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 371711a5261eSBarry Smith Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3718651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 3719273d9f13SBarry Smith 3720651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3721273d9f13SBarry Smith @*/ 37229371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) { 3723273d9f13SBarry Smith PetscFunctionBegin; 37246ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 37256ba663aaSJed Brown PetscValidType(B, 1); 37266ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3727cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3728273d9f13SBarry Smith PetscFunctionReturn(0); 3729273d9f13SBarry Smith } 3730a1d92eedSBarry Smith 3731725b52f3SLisandro Dalcin /*@C 373211a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3733725b52f3SLisandro Dalcin 3734d083f849SBarry Smith Collective 3735725b52f3SLisandro Dalcin 3736725b52f3SLisandro Dalcin Input Parameters: 37371c4f3114SJed Brown + B - the matrix 3738725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3739725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3740725b52f3SLisandro Dalcin - v - optional values in the matrix 3741725b52f3SLisandro Dalcin 3742664954b6SBarry Smith Level: advanced 3743725b52f3SLisandro Dalcin 37443adadaf3SJed Brown Notes: 374511a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 374611a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 37473adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 374811a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 37493adadaf3SJed Brown block column and the second index is over columns within a block. 37503adadaf3SJed Brown 3751664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3752664954b6SBarry Smith 3753db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3754725b52f3SLisandro Dalcin @*/ 37559371c9d4SSatish Balay PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3756725b52f3SLisandro Dalcin PetscFunctionBegin; 37576ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 37586ba663aaSJed Brown PetscValidType(B, 1); 37596ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 3760cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3761725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3762725b52f3SLisandro Dalcin } 3763725b52f3SLisandro Dalcin 3764c75a6043SHong Zhang /*@ 376511a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3766c75a6043SHong Zhang 3767d083f849SBarry Smith Collective 3768c75a6043SHong Zhang 3769c75a6043SHong Zhang Input Parameters: 3770c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3771c75a6043SHong Zhang . bs - size of block 3772c75a6043SHong Zhang . m - number of rows 3773c75a6043SHong Zhang . n - number of columns 3774483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3775c75a6043SHong Zhang . j - column indices 3776c75a6043SHong Zhang - a - matrix values 3777c75a6043SHong Zhang 3778c75a6043SHong Zhang Output Parameter: 3779c75a6043SHong Zhang . mat - the matrix 3780c75a6043SHong Zhang 3781dfb205c3SBarry Smith Level: advanced 3782c75a6043SHong Zhang 3783c75a6043SHong Zhang Notes: 3784c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3785c75a6043SHong Zhang once the matrix is destroyed 3786c75a6043SHong Zhang 3787c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3788c75a6043SHong Zhang 3789c75a6043SHong Zhang The i and j indices are 0 based 3790c75a6043SHong Zhang 379111a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3792dfb205c3SBarry Smith 37933adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 37943adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 37953adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 37963adadaf3SJed Brown with column-major ordering within blocks. 3797dfb205c3SBarry Smith 3798db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3799c75a6043SHong Zhang @*/ 38009371c9d4SSatish Balay PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) { 3801c75a6043SHong Zhang PetscInt ii; 3802c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3803c75a6043SHong Zhang 3804c75a6043SHong Zhang PetscFunctionBegin; 38055f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 38065f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3807c75a6043SHong Zhang 38089566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 38099566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n)); 38109566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ)); 38119566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3812c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data; 38139566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3814c75a6043SHong Zhang 3815c75a6043SHong Zhang baij->i = i; 3816c75a6043SHong Zhang baij->j = j; 3817c75a6043SHong Zhang baij->a = a; 381826fbe8dcSKarl Rupp 3819c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3820c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3821e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3822e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3823c75a6043SHong Zhang 3824c75a6043SHong Zhang for (ii = 0; ii < m; ii++) { 3825c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii]; 38266bdcaf15SBarry Smith PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]); 3827c75a6043SHong Zhang } 382876bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 3829c75a6043SHong Zhang for (ii = 0; ii < baij->i[m]; ii++) { 38306bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 38316bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3832c75a6043SHong Zhang } 383376bd3646SJed Brown } 3834c75a6043SHong Zhang 38359566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 38369566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3837c75a6043SHong Zhang PetscFunctionReturn(0); 3838c75a6043SHong Zhang } 3839bdf6f3fcSHong Zhang 38409371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3841bdf6f3fcSHong Zhang PetscFunctionBegin; 38429566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3843bdf6f3fcSHong Zhang PetscFunctionReturn(0); 3844bdf6f3fcSHong Zhang } 3845