15d517e7eSBarry Smith /* 2ec3a8b7bSBarry Smith Factorization code for BAIJ format. 35d517e7eSBarry Smith */ 4c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 5af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 6ec3a8b7bSBarry Smith 7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B, Mat A, const MatFactorInfo *info) 8d71ae5a4SJacob Faibussowitsch { 9b588c5a2SHong Zhang Mat C = B; 10b588c5a2SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 11b588c5a2SHong Zhang IS isrow = b->row, isicol = b->icol; 125a586d82SBarry Smith const PetscInt *r, *ic; 13bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2; 14bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag; 15bbd65245SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *pv; 16bbd65245SShri Abhyankar MatScalar *aa = a->a, *v; 17182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 18ff6a9541SJacob Faibussowitsch const PetscBool allowzeropivot = PetscNot(A->erroriffailure); 19b588c5a2SHong Zhang 20b588c5a2SHong Zhang PetscFunctionBegin; 219566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 229566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 23b588c5a2SHong Zhang 244c000e2eSHong Zhang /* generate work space needed by the factorization */ 259566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 27b588c5a2SHong Zhang 28ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) { 29b588c5a2SHong Zhang /* zero rtmp */ 30b588c5a2SHong Zhang /* L part */ 31ff6a9541SJacob Faibussowitsch PetscInt *pj; 32ff6a9541SJacob Faibussowitsch PetscInt nzL, nz = bi[i + 1] - bi[i]; 33b588c5a2SHong Zhang bjtmp = bj + bi[i]; 34ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 35b588c5a2SHong Zhang 36b588c5a2SHong Zhang /* U part */ 370c4413a7SShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 380c4413a7SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 39ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 400c4413a7SShri Abhyankar 410c4413a7SShri Abhyankar /* load in initial (unfactored row) */ 420c4413a7SShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 430c4413a7SShri Abhyankar ajtmp = aj + ai[r[i]]; 440c4413a7SShri Abhyankar v = aa + bs2 * ai[r[i]]; 45ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2)); 460c4413a7SShri Abhyankar 470c4413a7SShri Abhyankar /* elimination */ 480c4413a7SShri Abhyankar bjtmp = bj + bi[i]; 490c4413a7SShri Abhyankar nzL = bi[i + 1] - bi[i]; 50ff6a9541SJacob Faibussowitsch for (PetscInt k = 0; k < nzL; k++) { 51ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_FALSE; 52ff6a9541SJacob Faibussowitsch PetscInt row = bjtmp[k]; 53ff6a9541SJacob Faibussowitsch 540c4413a7SShri Abhyankar pc = rtmp + bs2 * row; 55ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < bs2; j++) { 56c35f09e5SBarry Smith if (pc[j] != (PetscScalar)0.0) { 57ff6a9541SJacob Faibussowitsch flg = PETSC_TRUE; 58c35f09e5SBarry Smith break; 59c35f09e5SBarry Smith } 60c35f09e5SBarry Smith } 610c4413a7SShri Abhyankar if (flg) { 620c4413a7SShri Abhyankar pv = b->a + bs2 * bdiag[row]; 6396b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 649566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork)); 650c4413a7SShri Abhyankar 66a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 670c4413a7SShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 680c4413a7SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 69ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) { 7096b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 710c4413a7SShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 720c4413a7SShri Abhyankar v = rtmp + 4 * pj[j]; 739566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv)); 740c4413a7SShri Abhyankar pv += 4; 750c4413a7SShri Abhyankar } 769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 770c4413a7SShri Abhyankar } 780c4413a7SShri Abhyankar } 790c4413a7SShri Abhyankar 800c4413a7SShri Abhyankar /* finished row so stick it into b->a */ 810c4413a7SShri Abhyankar /* L part */ 820c4413a7SShri Abhyankar pv = b->a + bs2 * bi[i]; 830c4413a7SShri Abhyankar pj = b->j + bi[i]; 840c4413a7SShri Abhyankar nz = bi[i + 1] - bi[i]; 85ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 860c4413a7SShri Abhyankar 87a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 880c4413a7SShri Abhyankar pv = b->a + bs2 * bdiag[i]; 890c4413a7SShri Abhyankar pj = b->j + bdiag[i]; 909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 91ff6a9541SJacob Faibussowitsch { 92ff6a9541SJacob Faibussowitsch PetscBool zeropivotdetected; 93ff6a9541SJacob Faibussowitsch 949566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected)); 957b6c816cSBarry Smith if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 96ff6a9541SJacob Faibussowitsch } 970c4413a7SShri Abhyankar 980c4413a7SShri Abhyankar /* U part */ 990c4413a7SShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 1000c4413a7SShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 1010c4413a7SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 102ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 1030c4413a7SShri Abhyankar } 1040c4413a7SShri Abhyankar 1059566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 1069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 10826fbe8dcSKarl Rupp 1094dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_2; 1104dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1110c4413a7SShri Abhyankar C->assembled = PETSC_TRUE; 11226fbe8dcSKarl Rupp 1139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */ 1143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1150c4413a7SShri Abhyankar } 1160c4413a7SShri Abhyankar 117d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 118d71ae5a4SJacob Faibussowitsch { 1194c000e2eSHong Zhang Mat C = B; 1204c000e2eSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 121bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 122bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2; 123bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag; 124bbd65245SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *pv; 125bbd65245SShri Abhyankar MatScalar *aa = a->a, *v; 126bbd65245SShri Abhyankar PetscInt flg; 127182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 128a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 1294c000e2eSHong Zhang 1304c000e2eSHong Zhang PetscFunctionBegin; 1310164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 1320164db54SHong Zhang 1334c000e2eSHong Zhang /* generate work space needed by the factorization */ 1349566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 1359566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 1364c000e2eSHong Zhang 1374c000e2eSHong Zhang for (i = 0; i < n; i++) { 1384c000e2eSHong Zhang /* zero rtmp */ 1394c000e2eSHong Zhang /* L part */ 1404c000e2eSHong Zhang nz = bi[i + 1] - bi[i]; 1414c000e2eSHong Zhang bjtmp = bj + bi[i]; 14248a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 1434c000e2eSHong Zhang 1444c000e2eSHong Zhang /* U part */ 145b2b2dd24SShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 146b2b2dd24SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 14748a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 148b2b2dd24SShri Abhyankar 149b2b2dd24SShri Abhyankar /* load in initial (unfactored row) */ 150b2b2dd24SShri Abhyankar nz = ai[i + 1] - ai[i]; 151b2b2dd24SShri Abhyankar ajtmp = aj + ai[i]; 152b2b2dd24SShri Abhyankar v = aa + bs2 * ai[i]; 15348a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); 154b2b2dd24SShri Abhyankar 155b2b2dd24SShri Abhyankar /* elimination */ 156b2b2dd24SShri Abhyankar bjtmp = bj + bi[i]; 157b2b2dd24SShri Abhyankar nzL = bi[i + 1] - bi[i]; 158b2b2dd24SShri Abhyankar for (k = 0; k < nzL; k++) { 159b2b2dd24SShri Abhyankar row = bjtmp[k]; 160b2b2dd24SShri Abhyankar pc = rtmp + bs2 * row; 161c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) { 162c35f09e5SBarry Smith if (pc[j] != (PetscScalar)0.0) { 163c35f09e5SBarry Smith flg = 1; 164c35f09e5SBarry Smith break; 165c35f09e5SBarry Smith } 166c35f09e5SBarry Smith } 167b2b2dd24SShri Abhyankar if (flg) { 168b2b2dd24SShri Abhyankar pv = b->a + bs2 * bdiag[row]; 16996b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 1709566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork)); 171b2b2dd24SShri Abhyankar 172b2b2dd24SShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 173b2b2dd24SShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 174b2b2dd24SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 175b2b2dd24SShri Abhyankar for (j = 0; j < nz; j++) { 17696b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 177b2b2dd24SShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 178b2b2dd24SShri Abhyankar v = rtmp + 4 * pj[j]; 1799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv)); 180b2b2dd24SShri Abhyankar pv += 4; 181b2b2dd24SShri Abhyankar } 1829566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 183b2b2dd24SShri Abhyankar } 184b2b2dd24SShri Abhyankar } 185b2b2dd24SShri Abhyankar 186b2b2dd24SShri Abhyankar /* finished row so stick it into b->a */ 187b2b2dd24SShri Abhyankar /* L part */ 188b2b2dd24SShri Abhyankar pv = b->a + bs2 * bi[i]; 189b2b2dd24SShri Abhyankar pj = b->j + bi[i]; 190b2b2dd24SShri Abhyankar nz = bi[i + 1] - bi[i]; 19148a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 192b2b2dd24SShri Abhyankar 193a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 194b2b2dd24SShri Abhyankar pv = b->a + bs2 * bdiag[i]; 195b2b2dd24SShri Abhyankar pj = b->j + bdiag[i]; 1969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 1979566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected)); 1987b6c816cSBarry Smith if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199b2b2dd24SShri Abhyankar 200b2b2dd24SShri Abhyankar /* U part */ 201b2b2dd24SShri Abhyankar /* 202b2b2dd24SShri Abhyankar pv = b->a + bs2*bi[2*n-i]; 203b2b2dd24SShri Abhyankar pj = b->j + bi[2*n-i]; 204b2b2dd24SShri Abhyankar nz = bi[2*n-i+1] - bi[2*n-i] - 1; 205b2b2dd24SShri Abhyankar */ 206b2b2dd24SShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 207b2b2dd24SShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 208b2b2dd24SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 20948a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 210b2b2dd24SShri Abhyankar } 2119566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 212ae3d28f0SHong Zhang 2134dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 2149f5c0bcdSBarry Smith C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_2_NaturalOrdering; 2159f5c0bcdSBarry Smith C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering; 2164dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 217b2b2dd24SShri Abhyankar C->assembled = PETSC_TRUE; 21826fbe8dcSKarl Rupp 2199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */ 2203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 221b2b2dd24SShri Abhyankar } 222b2b2dd24SShri Abhyankar 223421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_inplace(Mat B, Mat A, const MatFactorInfo *info) 224d71ae5a4SJacob Faibussowitsch { 225719d5645SBarry Smith Mat C = B; 2264eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 2277cdf2dbbSSatish Balay IS isrow = b->row, isicol = b->icol; 2285d0c19d7SBarry Smith const PetscInt *r, *ic; 2295d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j; 230690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row; 231421480d9SBarry Smith PetscInt idx, *ai = a->i, *aj = a->j, *pj; 232421480d9SBarry Smith const PetscInt *diag_offset; 233329f5518SBarry Smith MatScalar *pv, *v, *rtmp, m1, m2, m3, m4, *pc, *w, *x, x1, x2, x3, x4; 2342515f8d2SSatish Balay MatScalar p1, p2, p3, p4; 2353f1db9ecSBarry Smith MatScalar *ba = b->a, *aa = a->a; 236182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 237a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 2384eeb42bcSBarry Smith 2393a40ed3dSBarry Smith PetscFunctionBegin; 240421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */ 241421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE; 242421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL)); 243421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU; 2440164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 2459566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2469566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 2479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(4 * (n + 1), &rtmp)); 2484eeb42bcSBarry Smith 2494eeb42bcSBarry Smith for (i = 0; i < n; i++) { 2504078e994SBarry Smith nz = bi[i + 1] - bi[i]; 2514078e994SBarry Smith ajtmp = bj + bi[i]; 2524eeb42bcSBarry Smith for (j = 0; j < nz; j++) { 2539371c9d4SSatish Balay x = rtmp + 4 * ajtmp[j]; 2549371c9d4SSatish Balay x[0] = x[1] = x[2] = x[3] = 0.0; 2554eeb42bcSBarry Smith } 2564eeb42bcSBarry Smith /* load in initial (unfactored row) */ 2574eeb42bcSBarry Smith idx = r[i]; 2584078e994SBarry Smith nz = ai[idx + 1] - ai[idx]; 2594078e994SBarry Smith ajtmpold = aj + ai[idx]; 2604078e994SBarry Smith v = aa + 4 * ai[idx]; 2614eeb42bcSBarry Smith for (j = 0; j < nz; j++) { 2624eeb42bcSBarry Smith x = rtmp + 4 * ic[ajtmpold[j]]; 2639371c9d4SSatish Balay x[0] = v[0]; 2649371c9d4SSatish Balay x[1] = v[1]; 2659371c9d4SSatish Balay x[2] = v[2]; 2669371c9d4SSatish Balay x[3] = v[3]; 2674eeb42bcSBarry Smith v += 4; 2684eeb42bcSBarry Smith } 2694eeb42bcSBarry Smith row = *ajtmp++; 2704eeb42bcSBarry Smith while (row < i) { 2714eeb42bcSBarry Smith pc = rtmp + 4 * row; 2729371c9d4SSatish Balay p1 = pc[0]; 2739371c9d4SSatish Balay p2 = pc[1]; 2749371c9d4SSatish Balay p3 = pc[2]; 2759371c9d4SSatish Balay p4 = pc[3]; 276d4a378daSJed Brown if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 2774078e994SBarry Smith pv = ba + 4 * diag_offset[row]; 2784078e994SBarry Smith pj = bj + diag_offset[row] + 1; 2799371c9d4SSatish Balay x1 = pv[0]; 2809371c9d4SSatish Balay x2 = pv[1]; 2819371c9d4SSatish Balay x3 = pv[2]; 2829371c9d4SSatish Balay x4 = pv[3]; 2834eeb42bcSBarry Smith pc[0] = m1 = p1 * x1 + p3 * x2; 2844eeb42bcSBarry Smith pc[1] = m2 = p2 * x1 + p4 * x2; 2854eeb42bcSBarry Smith pc[2] = m3 = p1 * x3 + p3 * x4; 2864eeb42bcSBarry Smith pc[3] = m4 = p2 * x3 + p4 * x4; 2874078e994SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1; 2884eeb42bcSBarry Smith pv += 4; 2894eeb42bcSBarry Smith for (j = 0; j < nz; j++) { 2909371c9d4SSatish Balay x1 = pv[0]; 2919371c9d4SSatish Balay x2 = pv[1]; 2929371c9d4SSatish Balay x3 = pv[2]; 2939371c9d4SSatish Balay x4 = pv[3]; 2944eeb42bcSBarry Smith x = rtmp + 4 * pj[j]; 2954eeb42bcSBarry Smith x[0] -= m1 * x1 + m3 * x2; 2964eeb42bcSBarry Smith x[1] -= m2 * x1 + m4 * x2; 2974eeb42bcSBarry Smith x[2] -= m1 * x3 + m3 * x4; 2984eeb42bcSBarry Smith x[3] -= m2 * x3 + m4 * x4; 2994eeb42bcSBarry Smith pv += 4; 3004eeb42bcSBarry Smith } 3019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12.0)); 3024eeb42bcSBarry Smith } 3034eeb42bcSBarry Smith row = *ajtmp++; 3044eeb42bcSBarry Smith } 3054eeb42bcSBarry Smith /* finished row so stick it into b->a */ 3064078e994SBarry Smith pv = ba + 4 * bi[i]; 3074078e994SBarry Smith pj = bj + bi[i]; 3084078e994SBarry Smith nz = bi[i + 1] - bi[i]; 3094eeb42bcSBarry Smith for (j = 0; j < nz; j++) { 3104eeb42bcSBarry Smith x = rtmp + 4 * pj[j]; 3119371c9d4SSatish Balay pv[0] = x[0]; 3129371c9d4SSatish Balay pv[1] = x[1]; 3139371c9d4SSatish Balay pv[2] = x[2]; 3149371c9d4SSatish Balay pv[3] = x[3]; 3154eeb42bcSBarry Smith pv += 4; 3164eeb42bcSBarry Smith } 3174eeb42bcSBarry Smith /* invert diagonal block */ 3184078e994SBarry Smith w = ba + 4 * diag_offset[i]; 3199566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected)); 3207b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 3214eeb42bcSBarry Smith } 3224eeb42bcSBarry Smith 3239566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 3249566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 32626fbe8dcSKarl Rupp 32706e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_2_inplace; 32806e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace; 3294eeb42bcSBarry Smith C->assembled = PETSC_TRUE; 33026fbe8dcSKarl Rupp 3319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 3323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3334eeb42bcSBarry Smith } 3344cd38560SBarry Smith /* 3354cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering 3364cd38560SBarry Smith */ 337421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 338d71ae5a4SJacob Faibussowitsch { 3394cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 340690b6cddSBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j; 341690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row; 342421480d9SBarry Smith PetscInt *ai = a->i, *aj = a->j, *pj; 343421480d9SBarry Smith const PetscInt *diag_offset; 344329f5518SBarry Smith MatScalar *pv, *v, *rtmp, *pc, *w, *x; 3452515f8d2SSatish Balay MatScalar p1, p2, p3, p4, m1, m2, m3, m4, x1, x2, x3, x4; 3464cd38560SBarry Smith MatScalar *ba = b->a, *aa = a->a; 347182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 348a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 3494cd38560SBarry Smith 3504cd38560SBarry Smith PetscFunctionBegin; 351421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */ 352421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE; 353421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL)); 354421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU; 3550164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 3569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(4 * (n + 1), &rtmp)); 3574cd38560SBarry Smith for (i = 0; i < n; i++) { 3584cd38560SBarry Smith nz = bi[i + 1] - bi[i]; 3594cd38560SBarry Smith ajtmp = bj + bi[i]; 3604cd38560SBarry Smith for (j = 0; j < nz; j++) { 3614cd38560SBarry Smith x = rtmp + 4 * ajtmp[j]; 3624cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0; 3634cd38560SBarry Smith } 3644cd38560SBarry Smith /* load in initial (unfactored row) */ 3654cd38560SBarry Smith nz = ai[i + 1] - ai[i]; 3664cd38560SBarry Smith ajtmpold = aj + ai[i]; 3674cd38560SBarry Smith v = aa + 4 * ai[i]; 3684cd38560SBarry Smith for (j = 0; j < nz; j++) { 3694cd38560SBarry Smith x = rtmp + 4 * ajtmpold[j]; 3709371c9d4SSatish Balay x[0] = v[0]; 3719371c9d4SSatish Balay x[1] = v[1]; 3729371c9d4SSatish Balay x[2] = v[2]; 3739371c9d4SSatish Balay x[3] = v[3]; 3744cd38560SBarry Smith v += 4; 3754cd38560SBarry Smith } 3764cd38560SBarry Smith row = *ajtmp++; 3774cd38560SBarry Smith while (row < i) { 3784cd38560SBarry Smith pc = rtmp + 4 * row; 3799371c9d4SSatish Balay p1 = pc[0]; 3809371c9d4SSatish Balay p2 = pc[1]; 3819371c9d4SSatish Balay p3 = pc[2]; 3829371c9d4SSatish Balay p4 = pc[3]; 383d4a378daSJed Brown if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 3844cd38560SBarry Smith pv = ba + 4 * diag_offset[row]; 3854cd38560SBarry Smith pj = bj + diag_offset[row] + 1; 3869371c9d4SSatish Balay x1 = pv[0]; 3879371c9d4SSatish Balay x2 = pv[1]; 3889371c9d4SSatish Balay x3 = pv[2]; 3899371c9d4SSatish Balay x4 = pv[3]; 3904cd38560SBarry Smith pc[0] = m1 = p1 * x1 + p3 * x2; 3914cd38560SBarry Smith pc[1] = m2 = p2 * x1 + p4 * x2; 3924cd38560SBarry Smith pc[2] = m3 = p1 * x3 + p3 * x4; 3934cd38560SBarry Smith pc[3] = m4 = p2 * x3 + p4 * x4; 3944cd38560SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1; 3954cd38560SBarry Smith pv += 4; 3964cd38560SBarry Smith for (j = 0; j < nz; j++) { 3979371c9d4SSatish Balay x1 = pv[0]; 3989371c9d4SSatish Balay x2 = pv[1]; 3999371c9d4SSatish Balay x3 = pv[2]; 4009371c9d4SSatish Balay x4 = pv[3]; 4014cd38560SBarry Smith x = rtmp + 4 * pj[j]; 4024cd38560SBarry Smith x[0] -= m1 * x1 + m3 * x2; 4034cd38560SBarry Smith x[1] -= m2 * x1 + m4 * x2; 4044cd38560SBarry Smith x[2] -= m1 * x3 + m3 * x4; 4054cd38560SBarry Smith x[3] -= m2 * x3 + m4 * x4; 4064cd38560SBarry Smith pv += 4; 4074cd38560SBarry Smith } 4089566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12.0)); 4094cd38560SBarry Smith } 4104cd38560SBarry Smith row = *ajtmp++; 4114cd38560SBarry Smith } 4124cd38560SBarry Smith /* finished row so stick it into b->a */ 4134cd38560SBarry Smith pv = ba + 4 * bi[i]; 4144cd38560SBarry Smith pj = bj + bi[i]; 4154cd38560SBarry Smith nz = bi[i + 1] - bi[i]; 4164cd38560SBarry Smith for (j = 0; j < nz; j++) { 4174cd38560SBarry Smith x = rtmp + 4 * pj[j]; 4189371c9d4SSatish Balay pv[0] = x[0]; 4199371c9d4SSatish Balay pv[1] = x[1]; 4209371c9d4SSatish Balay pv[2] = x[2]; 4219371c9d4SSatish Balay pv[3] = x[3]; 4222efa7f71SHong Zhang /* 4232efa7f71SHong Zhang printf(" col %d:",pj[j]); 4242efa7f71SHong Zhang PetscInt j1; 4252efa7f71SHong Zhang for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1)); 4262efa7f71SHong Zhang printf("\n"); 4272efa7f71SHong Zhang */ 4284cd38560SBarry Smith pv += 4; 4294cd38560SBarry Smith } 4304cd38560SBarry Smith /* invert diagonal block */ 4314cd38560SBarry Smith w = ba + 4 * diag_offset[i]; 4329566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected)); 4337b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 4344cd38560SBarry Smith } 4354cd38560SBarry Smith 4369566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 43726fbe8dcSKarl Rupp 43806e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace; 43906e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace; 4404cd38560SBarry Smith C->assembled = PETSC_TRUE; 44126fbe8dcSKarl Rupp 4429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 4433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4444cd38560SBarry Smith } 4457fc0212eSBarry Smith 4467fc0212eSBarry Smith /* 4477fc0212eSBarry Smith Version for when blocks are 1 by 1. 4487fc0212eSBarry Smith */ 449d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B, Mat A, const MatFactorInfo *info) 450d71ae5a4SJacob Faibussowitsch { 451048b5e81SShri Abhyankar Mat C = B; 452048b5e81SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 453048b5e81SShri Abhyankar IS isrow = b->row, isicol = b->icol; 454048b5e81SShri Abhyankar const PetscInt *r, *ic, *ics; 455048b5e81SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 456048b5e81SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 457048b5e81SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 458048b5e81SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 459048b5e81SShri Abhyankar const MatScalar *aa = a->a, *v; 460ace3abfcSBarry Smith PetscBool row_identity, col_identity; 461048b5e81SShri Abhyankar FactorShiftCtx sctx; 462048b5e81SShri Abhyankar const PetscInt *ddiag; 463048b5e81SShri Abhyankar PetscReal rs; 464048b5e81SShri Abhyankar MatScalar d; 465048b5e81SShri Abhyankar 466048b5e81SShri Abhyankar PetscFunctionBegin; 467048b5e81SShri Abhyankar /* MatPivotSetUp(): initialize shift context sctx */ 4689566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 469048b5e81SShri Abhyankar 470048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 471421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &ddiag, NULL)); 472048b5e81SShri Abhyankar sctx.shift_top = info->zeropivot; 473048b5e81SShri Abhyankar for (i = 0; i < n; i++) { 474048b5e81SShri Abhyankar /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 475048b5e81SShri Abhyankar d = (aa)[ddiag[i]]; 476048b5e81SShri Abhyankar rs = -PetscAbsScalar(d) - PetscRealPart(d); 477048b5e81SShri Abhyankar v = aa + ai[i]; 478048b5e81SShri Abhyankar nz = ai[i + 1] - ai[i]; 47926fbe8dcSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 480048b5e81SShri Abhyankar if (rs > sctx.shift_top) sctx.shift_top = rs; 481048b5e81SShri Abhyankar } 482048b5e81SShri Abhyankar sctx.shift_top *= 1.1; 483048b5e81SShri Abhyankar sctx.nshift_max = 5; 484048b5e81SShri Abhyankar sctx.shift_lo = 0.; 485048b5e81SShri Abhyankar sctx.shift_hi = 1.; 486048b5e81SShri Abhyankar } 487048b5e81SShri Abhyankar 4889566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 491048b5e81SShri Abhyankar ics = ic; 492048b5e81SShri Abhyankar 493048b5e81SShri Abhyankar do { 494048b5e81SShri Abhyankar sctx.newshift = PETSC_FALSE; 495048b5e81SShri Abhyankar for (i = 0; i < n; i++) { 496048b5e81SShri Abhyankar /* zero rtmp */ 497048b5e81SShri Abhyankar /* L part */ 498048b5e81SShri Abhyankar nz = bi[i + 1] - bi[i]; 499048b5e81SShri Abhyankar bjtmp = bj + bi[i]; 500048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 501048b5e81SShri Abhyankar 502048b5e81SShri Abhyankar /* U part */ 503048b5e81SShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 504048b5e81SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 505048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 506048b5e81SShri Abhyankar 507048b5e81SShri Abhyankar /* load in initial (unfactored row) */ 508048b5e81SShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 509048b5e81SShri Abhyankar ajtmp = aj + ai[r[i]]; 510048b5e81SShri Abhyankar v = aa + ai[r[i]]; 51126fbe8dcSKarl Rupp for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 51226fbe8dcSKarl Rupp 513048b5e81SShri Abhyankar /* ZeropivotApply() */ 514048b5e81SShri Abhyankar rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 515048b5e81SShri Abhyankar 516048b5e81SShri Abhyankar /* elimination */ 517048b5e81SShri Abhyankar bjtmp = bj + bi[i]; 518048b5e81SShri Abhyankar row = *bjtmp++; 519048b5e81SShri Abhyankar nzL = bi[i + 1] - bi[i]; 520048b5e81SShri Abhyankar for (k = 0; k < nzL; k++) { 521048b5e81SShri Abhyankar pc = rtmp + row; 522d4a378daSJed Brown if (*pc != (PetscScalar)0.0) { 523048b5e81SShri Abhyankar pv = b->a + bdiag[row]; 524048b5e81SShri Abhyankar multiplier = *pc * (*pv); 525048b5e81SShri Abhyankar *pc = multiplier; 52626fbe8dcSKarl Rupp 527048b5e81SShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 528048b5e81SShri Abhyankar pv = b->a + bdiag[row + 1] + 1; 529048b5e81SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 530048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 5319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * nz)); 532048b5e81SShri Abhyankar } 533048b5e81SShri Abhyankar row = *bjtmp++; 534048b5e81SShri Abhyankar } 535048b5e81SShri Abhyankar 536048b5e81SShri Abhyankar /* finished row so stick it into b->a */ 537048b5e81SShri Abhyankar rs = 0.0; 538048b5e81SShri Abhyankar /* L part */ 539048b5e81SShri Abhyankar pv = b->a + bi[i]; 540048b5e81SShri Abhyankar pj = b->j + bi[i]; 541048b5e81SShri Abhyankar nz = bi[i + 1] - bi[i]; 542048b5e81SShri Abhyankar for (j = 0; j < nz; j++) { 5439371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5449371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 545048b5e81SShri Abhyankar } 546048b5e81SShri Abhyankar 547048b5e81SShri Abhyankar /* U part */ 548048b5e81SShri Abhyankar pv = b->a + bdiag[i + 1] + 1; 549048b5e81SShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 550048b5e81SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 551048b5e81SShri Abhyankar for (j = 0; j < nz; j++) { 5529371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5539371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 554048b5e81SShri Abhyankar } 555048b5e81SShri Abhyankar 556048b5e81SShri Abhyankar sctx.rs = rs; 557048b5e81SShri Abhyankar sctx.pv = rtmp[i]; 5589566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 559048b5e81SShri Abhyankar if (sctx.newshift) break; /* break for-loop */ 560048b5e81SShri Abhyankar rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 561048b5e81SShri Abhyankar 562a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 563048b5e81SShri Abhyankar pv = b->a + bdiag[i]; 564d4a378daSJed Brown *pv = (PetscScalar)1.0 / rtmp[i]; 565048b5e81SShri Abhyankar 566048b5e81SShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 567048b5e81SShri Abhyankar 568048b5e81SShri Abhyankar /* MatPivotRefine() */ 569048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 570048b5e81SShri Abhyankar /* 571048b5e81SShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 572048b5e81SShri Abhyankar * then try lower shift 573048b5e81SShri Abhyankar */ 574048b5e81SShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 575048b5e81SShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 576048b5e81SShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 577048b5e81SShri Abhyankar sctx.newshift = PETSC_TRUE; 578048b5e81SShri Abhyankar sctx.nshift++; 579048b5e81SShri Abhyankar } 580048b5e81SShri Abhyankar } while (sctx.newshift); 581048b5e81SShri Abhyankar 5829566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5849566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 585048b5e81SShri Abhyankar 5869566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5879566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 588048b5e81SShri Abhyankar if (row_identity && col_identity) { 589048b5e81SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 5909f5c0bcdSBarry Smith C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_1_NaturalOrdering; 5919f5c0bcdSBarry Smith C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering; 59293fd935bSShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 593048b5e81SShri Abhyankar } else { 594048b5e81SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_1; 59593fd935bSShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 596048b5e81SShri Abhyankar } 597048b5e81SShri Abhyankar C->assembled = PETSC_TRUE; 5989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 599048b5e81SShri Abhyankar 600048b5e81SShri Abhyankar /* MatShiftView(A,info,&sctx) */ 601048b5e81SShri Abhyankar if (sctx.nshift) { 602048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 6039566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 604048b5e81SShri Abhyankar } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 6059566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 606048b5e81SShri Abhyankar } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 6079566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 608048b5e81SShri Abhyankar } 609048b5e81SShri Abhyankar } 6103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 611048b5e81SShri Abhyankar } 612048b5e81SShri Abhyankar 613421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 614d71ae5a4SJacob Faibussowitsch { 6157fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 6167cdf2dbbSSatish Balay IS isrow = b->row, isicol = b->icol; 6175d0c19d7SBarry Smith const PetscInt *r, *ic; 6185d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j; 619690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row, *ai = a->i, *aj = a->j; 620421480d9SBarry Smith PetscInt diag, *pj; 621421480d9SBarry Smith const PetscInt *diag_offset; 622329f5518SBarry Smith MatScalar *pv, *v, *rtmp, multiplier, *pc; 6233f1db9ecSBarry Smith MatScalar *ba = b->a, *aa = a->a; 624ace3abfcSBarry Smith PetscBool row_identity, col_identity; 6257fc0212eSBarry Smith 6263a40ed3dSBarry Smith PetscFunctionBegin; 627421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */ 628421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE; 629421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL)); 630421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU; 6319566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 6347fc0212eSBarry Smith 6357fc0212eSBarry Smith for (i = 0; i < n; i++) { 6364078e994SBarry Smith nz = bi[i + 1] - bi[i]; 6374078e994SBarry Smith ajtmp = bj + bi[i]; 6387fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[ajtmp[j]] = 0.0; 6397fc0212eSBarry Smith 6407fc0212eSBarry Smith /* load in initial (unfactored row) */ 6414078e994SBarry Smith nz = ai[r[i] + 1] - ai[r[i]]; 6424078e994SBarry Smith ajtmpold = aj + ai[r[i]]; 6434078e994SBarry Smith v = aa + ai[r[i]]; 6447fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 6457fc0212eSBarry Smith 6467fc0212eSBarry Smith row = *ajtmp++; 6477fc0212eSBarry Smith while (row < i) { 6487fc0212eSBarry Smith pc = rtmp + row; 6497fc0212eSBarry Smith if (*pc != 0.0) { 6504078e994SBarry Smith pv = ba + diag_offset[row]; 6514078e994SBarry Smith pj = bj + diag_offset[row] + 1; 6527fc0212eSBarry Smith multiplier = *pc * *pv++; 6537fc0212eSBarry Smith *pc = multiplier; 6544078e994SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1; 6557fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 + 2.0 * nz)); 6577fc0212eSBarry Smith } 6587fc0212eSBarry Smith row = *ajtmp++; 6597fc0212eSBarry Smith } 6607fc0212eSBarry Smith /* finished row so stick it into b->a */ 6614078e994SBarry Smith pv = ba + bi[i]; 6624078e994SBarry Smith pj = bj + bi[i]; 6634078e994SBarry Smith nz = bi[i + 1] - bi[i]; 66426fbe8dcSKarl Rupp for (j = 0; j < nz; j++) pv[j] = rtmp[pj[j]]; 6654078e994SBarry Smith diag = diag_offset[i] - bi[i]; 6667fc0212eSBarry Smith /* check pivot entry for current row */ 66708401ef6SPierre Jolivet PetscCheck(pv[diag] != 0.0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 6687fc0212eSBarry Smith pv[diag] = 1.0 / pv[diag]; 6697fc0212eSBarry Smith } 6707fc0212eSBarry Smith 6719566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 6729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 6739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 6749566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 6759566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 676f58c8c74SBarry Smith if (row_identity && col_identity) { 67706e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace; 67806e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace; 679f58c8c74SBarry Smith } else { 68006e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_1_inplace; 68106e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace; 682f58c8c74SBarry Smith } 6837fc0212eSBarry Smith C->assembled = PETSC_TRUE; 6849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 6853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6867fc0212eSBarry Smith } 6877fc0212eSBarry Smith 688d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 689d71ae5a4SJacob Faibussowitsch { 6904ac6704cSBarry Smith PetscFunctionBegin; 6914ac6704cSBarry Smith *type = MATSOLVERPETSC; 6923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6934ac6704cSBarry Smith } 6944ac6704cSBarry Smith 695d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A, MatFactorType ftype, Mat *B) 696d71ae5a4SJacob Faibussowitsch { 697d0f46423SBarry Smith PetscInt n = A->rmap->n; 698b24902e0SBarry Smith 699b24902e0SBarry Smith PetscFunctionBegin; 700b499a2aeSHong Zhang #if defined(PETSC_USE_COMPLEX) 701b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || !(ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian Factor is not supported"); 702b499a2aeSHong Zhang #endif 7039566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 7049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 705c8342467SHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 7069566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ)); 70726fbe8dcSKarl Rupp 7084dd39f65SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 7094dd39f65SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 7109566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 7119566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 7129566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 713b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 7149566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 7159566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, A->rmap->bs, MAT_SKIP_ALLOCATION, NULL)); 71626fbe8dcSKarl Rupp 7175c9eb25fSBarry Smith (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 7185c9eb25fSBarry Smith (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 7194ac6704cSBarry Smith /* Future optimization would be direct symbolic and numerical factorization for BAIJ to support orderings and Cholesky, instead of first converting to SBAIJ */ 7209566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 7219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 722e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 723d5f3da31SBarry Smith (*B)->factortype = ftype; 724f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 72500c67f3bSHong Zhang 7269566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 7279566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 7289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 7293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 730b24902e0SBarry Smith } 731273d9f13SBarry Smith 732d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqBAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 733d71ae5a4SJacob Faibussowitsch { 7345d517e7eSBarry Smith Mat C; 7355d517e7eSBarry Smith 7363a40ed3dSBarry Smith PetscFunctionBegin; 7379566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 7389566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 7399566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 74026fbe8dcSKarl Rupp 741db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 742db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 74326fbe8dcSKarl Rupp 7449566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 7453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7465d517e7eSBarry Smith } 7474cd38560SBarry Smith 748c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 749d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 750d71ae5a4SJacob Faibussowitsch { 75178910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 75278910aadSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 75378910aadSHong Zhang IS ip = b->row; 7545d0c19d7SBarry Smith const PetscInt *rip; 7555d0c19d7SBarry Smith PetscInt i, j, mbs = a->mbs, bs = A->rmap->bs, *bi = b->i, *bj = b->j, *bcol; 75678910aadSHong Zhang PetscInt *ai = a->i, *aj = a->j; 75778910aadSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 75878910aadSHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 75907b50cabSHong Zhang PetscReal rs; 7600e95ead3SHong Zhang FactorShiftCtx sctx; 76178910aadSHong Zhang 762c05c3958SHong Zhang PetscFunctionBegin; 76307b50cabSHong Zhang if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */ 764*b0c98d1dSPierre Jolivet if (!a->sbaijMat) { 765*b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE; 766*b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE; 767*b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat)); 768*b0c98d1dSPierre Jolivet } 769f4f49eeaSPierre Jolivet PetscCall(a->sbaijMat->ops->choleskyfactornumeric(C, a->sbaijMat, info)); 7709566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat)); 7713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7726ad2eaddSHong Zhang } 77378910aadSHong Zhang 77407b50cabSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 7759566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 7763cea4cbeSHong Zhang 7779566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 7789566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 77978910aadSHong Zhang 78075567043SBarry Smith sctx.shift_amount = 0.; 7813cea4cbeSHong Zhang sctx.nshift = 0; 78278910aadSHong Zhang do { 78307b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 78478910aadSHong Zhang for (i = 0; i < mbs; i++) { 7859371c9d4SSatish Balay rtmp[i] = 0.0; 7869371c9d4SSatish Balay jl[i] = mbs; 7879371c9d4SSatish Balay il[0] = 0; 78878910aadSHong Zhang } 78978910aadSHong Zhang 79078910aadSHong Zhang for (k = 0; k < mbs; k++) { 79178910aadSHong Zhang bval = ba + bi[k]; 79278910aadSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 7939371c9d4SSatish Balay jmin = ai[rip[k]]; 7949371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 79578910aadSHong Zhang for (j = jmin; j < jmax; j++) { 79678910aadSHong Zhang col = rip[aj[j]]; 79778910aadSHong Zhang if (col >= k) { /* only take upper triangular entry */ 79878910aadSHong Zhang rtmp[col] = aa[j]; 79978910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 80078910aadSHong Zhang } 80178910aadSHong Zhang } 8023cea4cbeSHong Zhang 8033cea4cbeSHong Zhang /* shift the diagonal of the matrix */ 8043cea4cbeSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 80578910aadSHong Zhang 80678910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 80778910aadSHong Zhang dk = rtmp[k]; 80878910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 80978910aadSHong Zhang 81078910aadSHong Zhang while (i < k) { 81178910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 81278910aadSHong Zhang 81378910aadSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 81478910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 81578910aadSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 81678910aadSHong Zhang dk += uikdi * ba[ili]; 81778910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 81878910aadSHong Zhang 81978910aadSHong Zhang /* add multiple of row i to k-th row */ 8209371c9d4SSatish Balay jmin = ili + 1; 8219371c9d4SSatish Balay jmax = bi[i + 1]; 82278910aadSHong Zhang if (jmin < jmax) { 82378910aadSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 82478910aadSHong Zhang /* update il and jl for row i */ 82578910aadSHong Zhang il[i] = jmin; 8269371c9d4SSatish Balay j = bj[jmin]; 8279371c9d4SSatish Balay jl[i] = jl[j]; 8289371c9d4SSatish Balay jl[j] = i; 82978910aadSHong Zhang } 83078910aadSHong Zhang i = nexti; 83178910aadSHong Zhang } 83278910aadSHong Zhang 8333cea4cbeSHong Zhang /* shift the diagonals when zero pivot is detected */ 8343cea4cbeSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 8353cea4cbeSHong Zhang rs = 0.0; 83678910aadSHong Zhang jmin = bi[k] + 1; 83778910aadSHong Zhang nz = bi[k + 1] - jmin; 83878910aadSHong Zhang if (nz) { 83978910aadSHong Zhang bcol = bj + jmin; 84078910aadSHong Zhang while (nz--) { 84189f845c8SHong Zhang rs += PetscAbsScalar(rtmp[*bcol]); 84289f845c8SHong Zhang bcol++; 84378910aadSHong Zhang } 84478910aadSHong Zhang } 8453cea4cbeSHong Zhang 8463cea4cbeSHong Zhang sctx.rs = rs; 8473cea4cbeSHong Zhang sctx.pv = dk; 8489566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 84907b50cabSHong Zhang if (sctx.newshift) break; 8500e95ead3SHong Zhang dk = sctx.pv; 85178910aadSHong Zhang 85278910aadSHong Zhang /* copy data into U(k,:) */ 85378910aadSHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 8549371c9d4SSatish Balay jmin = bi[k] + 1; 8559371c9d4SSatish Balay jmax = bi[k + 1]; 85678910aadSHong Zhang if (jmin < jmax) { 85778910aadSHong Zhang for (j = jmin; j < jmax; j++) { 8589371c9d4SSatish Balay col = bj[j]; 8599371c9d4SSatish Balay ba[j] = rtmp[col]; 8609371c9d4SSatish Balay rtmp[col] = 0.0; 86178910aadSHong Zhang } 86278910aadSHong Zhang /* add the k-th row into il and jl */ 86378910aadSHong Zhang il[k] = jmin; 8649371c9d4SSatish Balay i = bj[jmin]; 8659371c9d4SSatish Balay jl[k] = jl[i]; 8669371c9d4SSatish Balay jl[i] = k; 86778910aadSHong Zhang } 86878910aadSHong Zhang } 86907b50cabSHong Zhang } while (sctx.newshift); 8709566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 87178910aadSHong Zhang 8729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 87326fbe8dcSKarl Rupp 87478910aadSHong Zhang C->assembled = PETSC_TRUE; 87578910aadSHong Zhang C->preallocated = PETSC_TRUE; 87626fbe8dcSKarl Rupp 8779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->N)); 8783cea4cbeSHong Zhang if (sctx.nshift) { 879f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 8809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 881f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 8829566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 88378910aadSHong Zhang } 88478910aadSHong Zhang } 8853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 886c05c3958SHong Zhang } 8874cd38560SBarry Smith 888d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 889d71ae5a4SJacob Faibussowitsch { 89078910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 89178910aadSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 8923cea4cbeSHong Zhang PetscInt i, j, am = a->mbs; 89378910aadSHong Zhang PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 8943cea4cbeSHong Zhang PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 89578910aadSHong Zhang MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 8960e95ead3SHong Zhang PetscReal rs; 8970e95ead3SHong Zhang FactorShiftCtx sctx; 89878910aadSHong Zhang 899c05c3958SHong Zhang PetscFunctionBegin; 9000e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 9019566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 9023cea4cbeSHong Zhang 9039566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(am, &rtmp, am, &il, am, &jl)); 90478910aadSHong Zhang 90578910aadSHong Zhang do { 90607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 90778910aadSHong Zhang for (i = 0; i < am; i++) { 9089371c9d4SSatish Balay rtmp[i] = 0.0; 9099371c9d4SSatish Balay jl[i] = am; 9109371c9d4SSatish Balay il[0] = 0; 91178910aadSHong Zhang } 91278910aadSHong Zhang 91378910aadSHong Zhang for (k = 0; k < am; k++) { 91478910aadSHong Zhang /* initialize k-th row with elements nonzero in row perm(k) of A */ 91578910aadSHong Zhang nz = ai[k + 1] - ai[k]; 91678910aadSHong Zhang acol = aj + ai[k]; 91778910aadSHong Zhang aval = aa + ai[k]; 91878910aadSHong Zhang bval = ba + bi[k]; 91978910aadSHong Zhang while (nz--) { 92078910aadSHong Zhang if (*acol < k) { /* skip lower triangular entries */ 9219371c9d4SSatish Balay acol++; 9229371c9d4SSatish Balay aval++; 92378910aadSHong Zhang } else { 92478910aadSHong Zhang rtmp[*acol++] = *aval++; 92578910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 92678910aadSHong Zhang } 92778910aadSHong Zhang } 9283cea4cbeSHong Zhang 9293cea4cbeSHong Zhang /* shift the diagonal of the matrix */ 9303cea4cbeSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 93178910aadSHong Zhang 93278910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 93378910aadSHong Zhang dk = rtmp[k]; 93478910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 93578910aadSHong Zhang 93678910aadSHong Zhang while (i < k) { 93778910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 93878910aadSHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 93978910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 94078910aadSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; 94178910aadSHong Zhang dk += uikdi * ba[ili]; 94278910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 94378910aadSHong Zhang 94478910aadSHong Zhang /* add multiple of row i to k-th row ... */ 94578910aadSHong Zhang jmin = ili + 1; 94678910aadSHong Zhang nz = bi[i + 1] - jmin; 94778910aadSHong Zhang if (nz > 0) { 94878910aadSHong Zhang bcol = bj + jmin; 94978910aadSHong Zhang bval = ba + jmin; 95078910aadSHong Zhang while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 95178910aadSHong Zhang /* update il and jl for i-th row */ 95278910aadSHong Zhang il[i] = jmin; 9539371c9d4SSatish Balay j = bj[jmin]; 9549371c9d4SSatish Balay jl[i] = jl[j]; 9559371c9d4SSatish Balay jl[j] = i; 95678910aadSHong Zhang } 95778910aadSHong Zhang i = nexti; 95878910aadSHong Zhang } 95978910aadSHong Zhang 9603cea4cbeSHong Zhang /* shift the diagonals when zero pivot is detected */ 9613cea4cbeSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 9623cea4cbeSHong Zhang rs = 0.0; 96378910aadSHong Zhang jmin = bi[k] + 1; 96478910aadSHong Zhang nz = bi[k + 1] - jmin; 96578910aadSHong Zhang if (nz) { 96678910aadSHong Zhang bcol = bj + jmin; 96778910aadSHong Zhang while (nz--) { 96889f845c8SHong Zhang rs += PetscAbsScalar(rtmp[*bcol]); 96989f845c8SHong Zhang bcol++; 97078910aadSHong Zhang } 97178910aadSHong Zhang } 9723cea4cbeSHong Zhang 9733cea4cbeSHong Zhang sctx.rs = rs; 9743cea4cbeSHong Zhang sctx.pv = dk; 9759566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 97607b50cabSHong Zhang if (sctx.newshift) break; /* sctx.shift_amount is updated */ 9770e95ead3SHong Zhang dk = sctx.pv; 97878910aadSHong Zhang 97978910aadSHong Zhang /* copy data into U(k,:) */ 98078910aadSHong Zhang ba[bi[k]] = 1.0 / dk; 98178910aadSHong Zhang jmin = bi[k] + 1; 98278910aadSHong Zhang nz = bi[k + 1] - jmin; 98378910aadSHong Zhang if (nz) { 98478910aadSHong Zhang bcol = bj + jmin; 98578910aadSHong Zhang bval = ba + jmin; 98678910aadSHong Zhang while (nz--) { 98778910aadSHong Zhang *bval++ = rtmp[*bcol]; 98878910aadSHong Zhang rtmp[*bcol++] = 0.0; 98978910aadSHong Zhang } 99078910aadSHong Zhang /* add k-th row into il and jl */ 99178910aadSHong Zhang il[k] = jmin; 9929371c9d4SSatish Balay i = bj[jmin]; 9939371c9d4SSatish Balay jl[k] = jl[i]; 9949371c9d4SSatish Balay jl[i] = k; 99578910aadSHong Zhang } 99678910aadSHong Zhang } 99707b50cabSHong Zhang } while (sctx.newshift); 9989566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 99978910aadSHong Zhang 10000a3c351aSHong Zhang C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 10010a3c351aSHong Zhang C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 100278910aadSHong Zhang C->assembled = PETSC_TRUE; 100378910aadSHong Zhang C->preallocated = PETSC_TRUE; 100426fbe8dcSKarl Rupp 10059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->N)); 10063cea4cbeSHong Zhang if (sctx.nshift) { 1007f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 10089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1009f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 10109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 101178910aadSHong Zhang } 101278910aadSHong Zhang } 10133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1014c05c3958SHong Zhang } 1015c05c3958SHong Zhang 1016c6db04a5SJed Brown #include <petscbt.h> 1017c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 1018d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 1019d71ae5a4SJacob Faibussowitsch { 102078910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 102178910aadSHong Zhang Mat_SeqSBAIJ *b; 102278910aadSHong Zhang Mat B; 1023421480d9SBarry Smith PetscBool perm_identity; 10245d0c19d7SBarry Smith PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = a->mbs, bs = A->rmap->bs, *ui; 10255d0c19d7SBarry Smith const PetscInt *rip; 102678910aadSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 10270298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, ncols, ncols_upper, *cols, *cols_lvl, *uj, **uj_ptr, **uj_lvl_ptr; 102878910aadSHong Zhang PetscReal fill = info->fill, levels = info->levels; 10290298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 10300298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 103178910aadSHong Zhang PetscBT lnkbt; 1032421480d9SBarry Smith PetscBool diagDense; 1033421480d9SBarry Smith const PetscInt *adiag; 103478910aadSHong Zhang 1035c05c3958SHong Zhang PetscFunctionBegin; 1036421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, &diagDense)); 1037421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry"); 103848dd3d27SHong Zhang 10396ad2eaddSHong Zhang if (bs > 1) { 1040*b0c98d1dSPierre Jolivet if (!a->sbaijMat) { 1041*b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE; 1042*b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE; 1043*b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat)); 1044*b0c98d1dSPierre Jolivet } 104557508eceSPierre Jolivet fact->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 104626fbe8dcSKarl Rupp 10479566063dSJacob Faibussowitsch PetscCall(MatICCFactorSymbolic(fact, a->sbaijMat, perm, info)); 10483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10496ad2eaddSHong Zhang } 10506ad2eaddSHong Zhang 10519566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 10529566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 105378910aadSHong Zhang 105478910aadSHong Zhang /* special case that simply copies fill pattern */ 105578910aadSHong Zhang if (!levels && perm_identity) { 10569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 1057421480d9SBarry Smith for (i = 0; i < am; i++) ui[i] = ai[i + 1] - adiag[i]; /* ui: rowlengths - changes when !perm_identity */ 1058719d5645SBarry Smith B = fact; 10599566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, 1, 0, ui)); 106078910aadSHong Zhang 106178910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data; 106278910aadSHong Zhang uj = b->j; 106378910aadSHong Zhang for (i = 0; i < am; i++) { 1064421480d9SBarry Smith aj = a->j + adiag[i]; 106526fbe8dcSKarl Rupp for (j = 0; j < ui[i]; j++) *uj++ = *aj++; 106678910aadSHong Zhang b->ilen[i] = ui[i]; 106778910aadSHong Zhang } 10689566063dSJacob Faibussowitsch PetscCall(PetscFree(ui)); 106926fbe8dcSKarl Rupp 1070d5f3da31SBarry Smith B->factortype = MAT_FACTOR_NONE; 107126fbe8dcSKarl Rupp 10729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 10739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1074d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ICC; 107578910aadSHong Zhang 107678910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 10773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 107878910aadSHong Zhang } 107978910aadSHong Zhang 108078910aadSHong Zhang /* initialization */ 10819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 1082e60cf9a0SBarry Smith ui[0] = 0; 10839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * am + 1, &cols_lvl)); 108478910aadSHong Zhang 108578910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 108678910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 10879566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &il, am, &jl)); 108878910aadSHong Zhang for (i = 0; i < am; i++) { 10899371c9d4SSatish Balay jl[i] = am; 10909371c9d4SSatish Balay il[i] = 0; 109178910aadSHong Zhang } 109278910aadSHong Zhang 109378910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 109478910aadSHong Zhang nlnk = am + 1; 10959566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 109678910aadSHong Zhang 109795b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 10989566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space)); 109926fbe8dcSKarl Rupp 110078910aadSHong Zhang current_space = free_space; 110126fbe8dcSKarl Rupp 11029566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space_lvl)); 110378910aadSHong Zhang current_space_lvl = free_space_lvl; 110478910aadSHong Zhang 110578910aadSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 110678910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 110778910aadSHong Zhang nzk = 0; 110878910aadSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 110978910aadSHong Zhang ncols_upper = 0; 111078910aadSHong Zhang cols = cols_lvl + am; 111178910aadSHong Zhang for (j = 0; j < ncols; j++) { 111278910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 111378910aadSHong Zhang if (i >= k) { /* only take upper triangular entry */ 111478910aadSHong Zhang cols[ncols_upper] = i; 111578910aadSHong Zhang cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 111678910aadSHong Zhang ncols_upper++; 111778910aadSHong Zhang } 111878910aadSHong Zhang } 11199566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLAdd(ncols_upper, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt)); 112078910aadSHong Zhang nzk += nlnk; 112178910aadSHong Zhang 112278910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 112378910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 112478910aadSHong Zhang 112578910aadSHong Zhang while (prow < k) { 112678910aadSHong Zhang nextprow = jl[prow]; 112778910aadSHong Zhang 112878910aadSHong Zhang /* merge prow into k-th row */ 112978910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 113078910aadSHong Zhang jmax = ui[prow + 1]; 113178910aadSHong Zhang ncols = jmax - jmin; 113278910aadSHong Zhang i = jmin - ui[prow]; 113378910aadSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 113478910aadSHong Zhang for (j = 0; j < ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 11359566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLAddSorted(ncols, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt)); 113678910aadSHong Zhang nzk += nlnk; 113778910aadSHong Zhang 113878910aadSHong Zhang /* update il and jl for prow */ 113978910aadSHong Zhang if (jmin < jmax) { 114078910aadSHong Zhang il[prow] = jmin; 114126fbe8dcSKarl Rupp 11429371c9d4SSatish Balay j = *cols; 11439371c9d4SSatish Balay jl[prow] = jl[j]; 11449371c9d4SSatish Balay jl[j] = prow; 114578910aadSHong Zhang } 114678910aadSHong Zhang prow = nextprow; 114778910aadSHong Zhang } 114878910aadSHong Zhang 114978910aadSHong Zhang /* if free space is not available, make more free space */ 115078910aadSHong Zhang if (current_space->local_remaining < nzk) { 115178910aadSHong Zhang i = am - k + 1; /* num of unfactored rows */ 1152f91af8c7SBarry Smith i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 11539566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 11549566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 115578910aadSHong Zhang reallocs++; 115678910aadSHong Zhang } 115778910aadSHong Zhang 115878910aadSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 11599566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 116078910aadSHong Zhang 116178910aadSHong Zhang /* add the k-th row into il and jl */ 116278910aadSHong Zhang if (nzk - 1 > 0) { 116378910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 11649371c9d4SSatish Balay jl[k] = jl[i]; 11659371c9d4SSatish Balay jl[i] = k; 116678910aadSHong Zhang il[k] = ui[k] + 1; 116778910aadSHong Zhang } 116878910aadSHong Zhang uj_ptr[k] = current_space->array; 116978910aadSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 117078910aadSHong Zhang 117178910aadSHong Zhang current_space->array += nzk; 117278910aadSHong Zhang current_space->local_used += nzk; 117378910aadSHong Zhang current_space->local_remaining -= nzk; 117478910aadSHong Zhang 117578910aadSHong Zhang current_space_lvl->array += nzk; 117678910aadSHong Zhang current_space_lvl->local_used += nzk; 117778910aadSHong Zhang current_space_lvl->local_remaining -= nzk; 117878910aadSHong Zhang 117978910aadSHong Zhang ui[k + 1] = ui[k] + nzk; 118078910aadSHong Zhang } 118178910aadSHong Zhang 11829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 11839566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, il, jl)); 11849566063dSJacob Faibussowitsch PetscCall(PetscFree(cols_lvl)); 118578910aadSHong Zhang 11869263d837SHong Zhang /* copy free_space into uj and free free_space; set uj in new datastructure; */ 11879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 11889566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 11899566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 11909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 119178910aadSHong Zhang 119278910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 1193719d5645SBarry Smith B = fact; 11949566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, 1, MAT_SKIP_ALLOCATION, NULL)); 119578910aadSHong Zhang 119678910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data; 1197e6b907acSBarry Smith b->free_a = PETSC_TRUE; 1198e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 119926fbe8dcSKarl Rupp 12009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 120126fbe8dcSKarl Rupp 120278910aadSHong Zhang b->j = uj; 120378910aadSHong Zhang b->i = ui; 1204f4259b30SLisandro Dalcin b->diag = NULL; 1205f4259b30SLisandro Dalcin b->ilen = NULL; 1206f4259b30SLisandro Dalcin b->imax = NULL; 120778910aadSHong Zhang b->row = perm; 120878910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 120926fbe8dcSKarl Rupp 12109566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 121126fbe8dcSKarl Rupp 121278910aadSHong Zhang b->icol = perm; 121326fbe8dcSKarl Rupp 12149566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 12159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 121626fbe8dcSKarl Rupp 121778910aadSHong Zhang b->maxnz = b->nz = ui[am]; 121878910aadSHong Zhang 121978910aadSHong Zhang B->info.factor_mallocs = reallocs; 122078910aadSHong Zhang B->info.fill_ratio_given = fill; 122175567043SBarry Smith if (ai[am] != 0.) { 1222da81f932SPierre Jolivet /* nonzeros in lower triangular part of A (including diagonals)= (ai[am]+am)/2 */ 122395b5ac22SHong Zhang B->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 122478910aadSHong Zhang } else { 122578910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 122678910aadSHong Zhang } 12279263d837SHong Zhang #if defined(PETSC_USE_INFO) 12289263d837SHong Zhang if (ai[am] != 0) { 122995b5ac22SHong Zhang PetscReal af = B->info.fill_ratio_needed; 12309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 12319566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 12329566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 12339263d837SHong Zhang } else { 12349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 12359263d837SHong Zhang } 12369263d837SHong Zhang #endif 123778910aadSHong Zhang if (perm_identity) { 12380a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 12390a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 124078910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 124178910aadSHong Zhang } else { 124257508eceSPierre Jolivet fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 124378910aadSHong Zhang } 12443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1245c05c3958SHong Zhang } 1246c05c3958SHong Zhang 1247d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 1248d71ae5a4SJacob Faibussowitsch { 124978910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 125078910aadSHong Zhang Mat_SeqSBAIJ *b; 125178910aadSHong Zhang Mat B; 1252421480d9SBarry Smith PetscBool perm_identity; 125378910aadSHong Zhang PetscReal fill = info->fill; 12545d0c19d7SBarry Smith const PetscInt *rip; 12555d0c19d7SBarry Smith PetscInt i, mbs = a->mbs, bs = A->rmap->bs, *ai = a->i, *aj = a->j, reallocs = 0, prow; 125678910aadSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 125778910aadSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 12580298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 125978910aadSHong Zhang PetscBT lnkbt; 1260421480d9SBarry Smith PetscBool diagDense; 126178910aadSHong Zhang 1262c05c3958SHong Zhang PetscFunctionBegin; 12636ad2eaddSHong Zhang if (bs > 1) { /* convert to seqsbaij */ 1264*b0c98d1dSPierre Jolivet if (!a->sbaijMat) { 1265*b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE; 1266*b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE; 1267*b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat)); 1268*b0c98d1dSPierre Jolivet } 126957508eceSPierre Jolivet fact->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 127026fbe8dcSKarl Rupp 12719566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(fact, a->sbaijMat, perm, info)); 12723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12736ad2eaddSHong Zhang } 1274421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense)); 1275421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry"); 12769186b105SHong Zhang 127778910aadSHong Zhang /* check whether perm is the identity mapping */ 12789566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 127928b400f6SJacob Faibussowitsch PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported"); 12809566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 128178910aadSHong Zhang 128278910aadSHong Zhang /* initialization */ 12839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &ui)); 1284e60cf9a0SBarry Smith ui[0] = 0; 128578910aadSHong Zhang 128678910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 128778910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 12889566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 128978910aadSHong Zhang for (i = 0; i < mbs; i++) { 12909371c9d4SSatish Balay jl[i] = mbs; 12919371c9d4SSatish Balay il[i] = 0; 129278910aadSHong Zhang } 129378910aadSHong Zhang 129478910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 129578910aadSHong Zhang nlnk = mbs + 1; 12969566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 129778910aadSHong Zhang 12986a69fef8SHong Zhang /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */ 12999566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[mbs] / 2, mbs / 2)), &free_space)); 130026fbe8dcSKarl Rupp 130178910aadSHong Zhang current_space = free_space; 130278910aadSHong Zhang 130378910aadSHong Zhang for (k = 0; k < mbs; k++) { /* for each active row k */ 130478910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 130578910aadSHong Zhang nzk = 0; 130678910aadSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 1307e60cf9a0SBarry Smith ncols_upper = 0; 130878910aadSHong Zhang for (j = 0; j < ncols; j++) { 130978910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 131078910aadSHong Zhang if (i >= k) { /* only take upper triangular entry */ 131178910aadSHong Zhang cols[ncols_upper] = i; 131278910aadSHong Zhang ncols_upper++; 131378910aadSHong Zhang } 131478910aadSHong Zhang } 13159566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, mbs, &nlnk, lnk, lnkbt)); 131678910aadSHong Zhang nzk += nlnk; 131778910aadSHong Zhang 131878910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 131978910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 132078910aadSHong Zhang 132178910aadSHong Zhang while (prow < k) { 132278910aadSHong Zhang nextprow = jl[prow]; 132378910aadSHong Zhang /* merge prow into k-th row */ 132478910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 132578910aadSHong Zhang jmax = ui[prow + 1]; 132678910aadSHong Zhang ncols = jmax - jmin; 132778910aadSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 13289566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 132978910aadSHong Zhang nzk += nlnk; 133078910aadSHong Zhang 133178910aadSHong Zhang /* update il and jl for prow */ 133278910aadSHong Zhang if (jmin < jmax) { 133378910aadSHong Zhang il[prow] = jmin; 133426fbe8dcSKarl Rupp j = *uj_ptr; 133526fbe8dcSKarl Rupp jl[prow] = jl[j]; 133626fbe8dcSKarl Rupp jl[j] = prow; 133778910aadSHong Zhang } 133878910aadSHong Zhang prow = nextprow; 133978910aadSHong Zhang } 134078910aadSHong Zhang 134178910aadSHong Zhang /* if free space is not available, make more free space */ 134278910aadSHong Zhang if (current_space->local_remaining < nzk) { 134378910aadSHong Zhang i = mbs - k + 1; /* num of unfactored rows */ 1344f91af8c7SBarry Smith i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 13459566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 134678910aadSHong Zhang reallocs++; 134778910aadSHong Zhang } 134878910aadSHong Zhang 134978910aadSHong Zhang /* copy data into free space, then initialize lnk */ 13509566063dSJacob Faibussowitsch PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 135178910aadSHong Zhang 135278910aadSHong Zhang /* add the k-th row into il and jl */ 135378910aadSHong Zhang if (nzk - 1 > 0) { 135478910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 13559371c9d4SSatish Balay jl[k] = jl[i]; 13569371c9d4SSatish Balay jl[i] = k; 135778910aadSHong Zhang il[k] = ui[k] + 1; 135878910aadSHong Zhang } 135978910aadSHong Zhang ui_ptr[k] = current_space->array; 136078910aadSHong Zhang current_space->array += nzk; 136178910aadSHong Zhang current_space->local_used += nzk; 136278910aadSHong Zhang current_space->local_remaining -= nzk; 136378910aadSHong Zhang 136478910aadSHong Zhang ui[k + 1] = ui[k] + nzk; 136578910aadSHong Zhang } 136678910aadSHong Zhang 13679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 13689566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 136978910aadSHong Zhang 13709263d837SHong Zhang /* copy free_space into uj and free free_space; set uj in new datastructure; */ 13719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 13729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 13739566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 137478910aadSHong Zhang 137578910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 1376719d5645SBarry Smith B = fact; 13779566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 137878910aadSHong Zhang 137978910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data; 1380e6b907acSBarry Smith b->free_a = PETSC_TRUE; 1381e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 138226fbe8dcSKarl Rupp 13839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 138426fbe8dcSKarl Rupp 138578910aadSHong Zhang b->j = uj; 138678910aadSHong Zhang b->i = ui; 1387f4259b30SLisandro Dalcin b->diag = NULL; 1388f4259b30SLisandro Dalcin b->ilen = NULL; 1389f4259b30SLisandro Dalcin b->imax = NULL; 139078910aadSHong Zhang b->row = perm; 139178910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 139226fbe8dcSKarl Rupp 13939566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 139478910aadSHong Zhang b->icol = perm; 13959566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 13969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 139778910aadSHong Zhang b->maxnz = b->nz = ui[mbs]; 139878910aadSHong Zhang 139978910aadSHong Zhang B->info.factor_mallocs = reallocs; 140078910aadSHong Zhang B->info.fill_ratio_given = fill; 140175567043SBarry Smith if (ai[mbs] != 0.) { 140295b5ac22SHong Zhang /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */ 140395b5ac22SHong Zhang B->info.fill_ratio_needed = ((PetscReal)2 * ui[mbs]) / (ai[mbs] + mbs); 140478910aadSHong Zhang } else { 140578910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 140678910aadSHong Zhang } 14079263d837SHong Zhang #if defined(PETSC_USE_INFO) 14089263d837SHong Zhang if (ai[mbs] != 0.) { 14099263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 14109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 14119566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 14129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 14139263d837SHong Zhang } else { 14149566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 14159263d837SHong Zhang } 14169263d837SHong Zhang #endif 141778910aadSHong Zhang if (perm_identity) { 14186ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 141978910aadSHong Zhang } else { 14206ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 142178910aadSHong Zhang } 14223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1423c05c3958SHong Zhang } 1424c8342467SHong Zhang 1425d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A, Vec bb, Vec xx) 1426d71ae5a4SJacob Faibussowitsch { 14271a83e813SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 14281a83e813SShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 14291a83e813SShri Abhyankar PetscInt i, k, n = a->mbs; 14301a83e813SShri Abhyankar PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2; 1431d9ca1df4SBarry Smith const MatScalar *aa = a->a, *v; 1432d9ca1df4SBarry Smith PetscScalar *x, *s, *t, *ls; 1433d9ca1df4SBarry Smith const PetscScalar *b; 14341a83e813SShri Abhyankar 14351a83e813SShri Abhyankar PetscFunctionBegin; 1436ce6b3536SJed Brown PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs); 14379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14389566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 14391a83e813SShri Abhyankar t = a->solve_work; 14401a83e813SShri Abhyankar 14411a83e813SShri Abhyankar /* forward solve the lower triangular */ 14429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */ 14431a83e813SShri Abhyankar 14441a83e813SShri Abhyankar for (i = 1; i < n; i++) { 14451a83e813SShri Abhyankar v = aa + bs2 * ai[i]; 14461a83e813SShri Abhyankar vi = aj + ai[i]; 14471a83e813SShri Abhyankar nz = ai[i + 1] - ai[i]; 14481a83e813SShri Abhyankar s = t + bs * i; 14499566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */ 14501a83e813SShri Abhyankar for (k = 0; k < nz; k++) { 145196b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[k]); 14521a83e813SShri Abhyankar v += bs2; 14531a83e813SShri Abhyankar } 14541a83e813SShri Abhyankar } 14551a83e813SShri Abhyankar 14561a83e813SShri Abhyankar /* backward solve the upper triangular */ 14571a83e813SShri Abhyankar ls = a->solve_work + A->cmap->n; 14581a83e813SShri Abhyankar for (i = n - 1; i >= 0; i--) { 14591a83e813SShri Abhyankar v = aa + bs2 * (adiag[i + 1] + 1); 14601a83e813SShri Abhyankar vi = aj + adiag[i + 1] + 1; 14611a83e813SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 14629566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ls, t + i * bs, bs)); 14631a83e813SShri Abhyankar for (k = 0; k < nz; k++) { 146496b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[k]); 14651a83e813SShri Abhyankar v += bs2; 14661a83e813SShri Abhyankar } 146796b95a6bSBarry Smith PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * adiag[i], t + i * bs); /* *inv(diagonal[i]) */ 14689566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs)); 14691a83e813SShri Abhyankar } 14701a83e813SShri Abhyankar 14719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 14739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n)); 14743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14751a83e813SShri Abhyankar } 14761a83e813SShri Abhyankar 1477d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N(Mat A, Vec bb, Vec xx) 1478d71ae5a4SJacob Faibussowitsch { 147935aa4fcfSShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 148035aa4fcfSShri Abhyankar IS iscol = a->col, isrow = a->row; 148135aa4fcfSShri Abhyankar const PetscInt *r, *c, *rout, *cout, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 148235aa4fcfSShri Abhyankar PetscInt i, m, n = a->mbs; 148335aa4fcfSShri Abhyankar PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2; 1484d9ca1df4SBarry Smith const MatScalar *aa = a->a, *v; 1485d9ca1df4SBarry Smith PetscScalar *x, *s, *t, *ls; 1486d9ca1df4SBarry Smith const PetscScalar *b; 148735aa4fcfSShri Abhyankar 148835aa4fcfSShri Abhyankar PetscFunctionBegin; 1489ce6b3536SJed Brown PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs); 14909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14919566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 149235aa4fcfSShri Abhyankar t = a->solve_work; 149335aa4fcfSShri Abhyankar 14949371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14959371c9d4SSatish Balay r = rout; 14969371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14979371c9d4SSatish Balay c = cout; 149835aa4fcfSShri Abhyankar 149935aa4fcfSShri Abhyankar /* forward solve the lower triangular */ 15009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b + bs * r[0], bs)); 150135aa4fcfSShri Abhyankar for (i = 1; i < n; i++) { 150235aa4fcfSShri Abhyankar v = aa + bs2 * ai[i]; 150335aa4fcfSShri Abhyankar vi = aj + ai[i]; 150435aa4fcfSShri Abhyankar nz = ai[i + 1] - ai[i]; 150535aa4fcfSShri Abhyankar s = t + bs * i; 15069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(s, b + bs * r[i], bs)); 150735aa4fcfSShri Abhyankar for (m = 0; m < nz; m++) { 150896b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[m]); 150935aa4fcfSShri Abhyankar v += bs2; 151035aa4fcfSShri Abhyankar } 151135aa4fcfSShri Abhyankar } 151235aa4fcfSShri Abhyankar 151335aa4fcfSShri Abhyankar /* backward solve the upper triangular */ 151435aa4fcfSShri Abhyankar ls = a->solve_work + A->cmap->n; 151535aa4fcfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 151635aa4fcfSShri Abhyankar v = aa + bs2 * (adiag[i + 1] + 1); 151735aa4fcfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 151835aa4fcfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 15199566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ls, t + i * bs, bs)); 152035aa4fcfSShri Abhyankar for (m = 0; m < nz; m++) { 152196b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[m]); 152235aa4fcfSShri Abhyankar v += bs2; 152335aa4fcfSShri Abhyankar } 152496b95a6bSBarry Smith PetscKernel_w_gets_A_times_v(bs, ls, v, t + i * bs); /* *inv(diagonal[i]) */ 15259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x + bs * c[i], t + i * bs, bs)); 152635aa4fcfSShri Abhyankar } 15279566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15289566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 15299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 15319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n)); 15323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 153335aa4fcfSShri Abhyankar } 1534