15d517e7eSBarry Smith /* 2ec3a8b7bSBarry Smith Factorization code for BAIJ format. 35d517e7eSBarry Smith */ 470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 54078e994SBarry Smith #include "src/inline/ilu.h" 6ec3a8b7bSBarry Smith 76d84be18SBarry Smith /* ------------------------------------------------------------*/ 86d84be18SBarry Smith /* 94eeb42bcSBarry Smith Version for when blocks are 2 by 2 104eeb42bcSBarry Smith */ 114a2ae208SSatish Balay #undef __FUNCT__ 124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 144eeb42bcSBarry Smith { 154eeb42bcSBarry Smith Mat C = *B; 164eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 177cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 18*6849ba73SBarry Smith PetscErrorCode ierr; 19*6849ba73SBarry Smith int *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 202515f8d2SSatish Balay int *ajtmpold,*ajtmp,nz,row; 21329f5518SBarry Smith int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22329f5518SBarry Smith MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 232515f8d2SSatish Balay MatScalar p1,p2,p3,p4; 243f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 254eeb42bcSBarry Smith 263a40ed3dSBarry Smith PetscFunctionBegin; 274eeb42bcSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284eeb42bcSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 304eeb42bcSBarry Smith 314eeb42bcSBarry Smith for (i=0; i<n; i++) { 324078e994SBarry Smith nz = bi[i+1] - bi[i]; 334078e994SBarry Smith ajtmp = bj + bi[i]; 344eeb42bcSBarry Smith for (j=0; j<nz; j++) { 354eeb42bcSBarry Smith x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 364eeb42bcSBarry Smith } 374eeb42bcSBarry Smith /* load in initial (unfactored row) */ 384eeb42bcSBarry Smith idx = r[i]; 394078e994SBarry Smith nz = ai[idx+1] - ai[idx]; 404078e994SBarry Smith ajtmpold = aj + ai[idx]; 414078e994SBarry Smith v = aa + 4*ai[idx]; 424eeb42bcSBarry Smith for (j=0; j<nz; j++) { 434eeb42bcSBarry Smith x = rtmp+4*ic[ajtmpold[j]]; 444eeb42bcSBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 454eeb42bcSBarry Smith v += 4; 464eeb42bcSBarry Smith } 474eeb42bcSBarry Smith row = *ajtmp++; 484eeb42bcSBarry Smith while (row < i) { 494eeb42bcSBarry Smith pc = rtmp + 4*row; 504eeb42bcSBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 5188685aaeSLois Curfman McInnes if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 524078e994SBarry Smith pv = ba + 4*diag_offset[row]; 534078e994SBarry Smith pj = bj + diag_offset[row] + 1; 544eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 554eeb42bcSBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 564eeb42bcSBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 574eeb42bcSBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 584eeb42bcSBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 594078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 604eeb42bcSBarry Smith pv += 4; 614eeb42bcSBarry Smith for (j=0; j<nz; j++) { 624eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 634eeb42bcSBarry Smith x = rtmp + 4*pj[j]; 644eeb42bcSBarry Smith x[0] -= m1*x1 + m3*x2; 654eeb42bcSBarry Smith x[1] -= m2*x1 + m4*x2; 664eeb42bcSBarry Smith x[2] -= m1*x3 + m3*x4; 674eeb42bcSBarry Smith x[3] -= m2*x3 + m4*x4; 684eeb42bcSBarry Smith pv += 4; 694eeb42bcSBarry Smith } 70b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 714eeb42bcSBarry Smith } 724eeb42bcSBarry Smith row = *ajtmp++; 734eeb42bcSBarry Smith } 744eeb42bcSBarry Smith /* finished row so stick it into b->a */ 754078e994SBarry Smith pv = ba + 4*bi[i]; 764078e994SBarry Smith pj = bj + bi[i]; 774078e994SBarry Smith nz = bi[i+1] - bi[i]; 784eeb42bcSBarry Smith for (j=0; j<nz; j++) { 794eeb42bcSBarry Smith x = rtmp+4*pj[j]; 804eeb42bcSBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 814eeb42bcSBarry Smith pv += 4; 824eeb42bcSBarry Smith } 834eeb42bcSBarry Smith /* invert diagonal block */ 844078e994SBarry Smith w = ba + 4*diag_offset[i]; 854cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 864eeb42bcSBarry Smith } 874eeb42bcSBarry Smith 88606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 894eeb42bcSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 904eeb42bcSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 914eeb42bcSBarry Smith C->factor = FACTOR_LU; 924eeb42bcSBarry Smith C->assembled = PETSC_TRUE; 93b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 943a40ed3dSBarry Smith PetscFunctionReturn(0); 954eeb42bcSBarry Smith } 964cd38560SBarry Smith /* 974cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering 984cd38560SBarry Smith */ 994a2ae208SSatish Balay #undef __FUNCT__ 1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 1024cd38560SBarry Smith { 1034cd38560SBarry Smith Mat C = *B; 1044cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105dfbe8321SBarry Smith PetscErrorCode ierr; 106dfbe8321SBarry Smith int i,j,n = a->mbs,*bi = b->i,*bj = b->j; 1072515f8d2SSatish Balay int *ajtmpold,*ajtmp,nz,row; 108329f5518SBarry Smith int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109329f5518SBarry Smith MatScalar *pv,*v,*rtmp,*pc,*w,*x; 1102515f8d2SSatish Balay MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 1114cd38560SBarry Smith MatScalar *ba = b->a,*aa = a->a; 1124cd38560SBarry Smith 1134cd38560SBarry Smith PetscFunctionBegin; 114b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1154cd38560SBarry Smith 1164cd38560SBarry Smith for (i=0; i<n; i++) { 1174cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1184cd38560SBarry Smith ajtmp = bj + bi[i]; 1194cd38560SBarry Smith for (j=0; j<nz; j++) { 1204cd38560SBarry Smith x = rtmp+4*ajtmp[j]; 1214cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0; 1224cd38560SBarry Smith } 1234cd38560SBarry Smith /* load in initial (unfactored row) */ 1244cd38560SBarry Smith nz = ai[i+1] - ai[i]; 1254cd38560SBarry Smith ajtmpold = aj + ai[i]; 1264cd38560SBarry Smith v = aa + 4*ai[i]; 1274cd38560SBarry Smith for (j=0; j<nz; j++) { 1284cd38560SBarry Smith x = rtmp+4*ajtmpold[j]; 1294cd38560SBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 1304cd38560SBarry Smith v += 4; 1314cd38560SBarry Smith } 1324cd38560SBarry Smith row = *ajtmp++; 1334cd38560SBarry Smith while (row < i) { 1344cd38560SBarry Smith pc = rtmp + 4*row; 1354cd38560SBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 1364cd38560SBarry Smith if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 1374cd38560SBarry Smith pv = ba + 4*diag_offset[row]; 1384cd38560SBarry Smith pj = bj + diag_offset[row] + 1; 1394cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1404cd38560SBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 1414cd38560SBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 1424cd38560SBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 1434cd38560SBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 1444cd38560SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 1454cd38560SBarry Smith pv += 4; 1464cd38560SBarry Smith for (j=0; j<nz; j++) { 1474cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1484cd38560SBarry Smith x = rtmp + 4*pj[j]; 1494cd38560SBarry Smith x[0] -= m1*x1 + m3*x2; 1504cd38560SBarry Smith x[1] -= m2*x1 + m4*x2; 1514cd38560SBarry Smith x[2] -= m1*x3 + m3*x4; 1524cd38560SBarry Smith x[3] -= m2*x3 + m4*x4; 1534cd38560SBarry Smith pv += 4; 1544cd38560SBarry Smith } 155b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 1564cd38560SBarry Smith } 1574cd38560SBarry Smith row = *ajtmp++; 1584cd38560SBarry Smith } 1594cd38560SBarry Smith /* finished row so stick it into b->a */ 1604cd38560SBarry Smith pv = ba + 4*bi[i]; 1614cd38560SBarry Smith pj = bj + bi[i]; 1624cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1634cd38560SBarry Smith for (j=0; j<nz; j++) { 1644cd38560SBarry Smith x = rtmp+4*pj[j]; 1654cd38560SBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 1664cd38560SBarry Smith pv += 4; 1674cd38560SBarry Smith } 1684cd38560SBarry Smith /* invert diagonal block */ 1694cd38560SBarry Smith w = ba + 4*diag_offset[i]; 1704cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 1714cd38560SBarry Smith /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 1724cd38560SBarry Smith } 1734cd38560SBarry Smith 174606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1754cd38560SBarry Smith C->factor = FACTOR_LU; 1764cd38560SBarry Smith C->assembled = PETSC_TRUE; 177b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1784cd38560SBarry Smith PetscFunctionReturn(0); 1794cd38560SBarry Smith } 1807fc0212eSBarry Smith 1817fc0212eSBarry Smith /* ----------------------------------------------------------- */ 1827fc0212eSBarry Smith /* 1837fc0212eSBarry Smith Version for when blocks are 1 by 1. 1847fc0212eSBarry Smith */ 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 1887fc0212eSBarry Smith { 1897fc0212eSBarry Smith Mat C = *B; 1907fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 1917cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 192*6849ba73SBarry Smith PetscErrorCode ierr; 193*6849ba73SBarry Smith int *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 1944078e994SBarry Smith int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 195329f5518SBarry Smith int *diag_offset = b->diag,diag,*pj; 196329f5518SBarry Smith MatScalar *pv,*v,*rtmp,multiplier,*pc; 1973f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 1987fc0212eSBarry Smith 1993a40ed3dSBarry Smith PetscFunctionBegin; 2007fc0212eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2017fc0212eSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 202b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2037fc0212eSBarry Smith 2047fc0212eSBarry Smith for (i=0; i<n; i++) { 2054078e994SBarry Smith nz = bi[i+1] - bi[i]; 2064078e994SBarry Smith ajtmp = bj + bi[i]; 2077fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 2087fc0212eSBarry Smith 2097fc0212eSBarry Smith /* load in initial (unfactored row) */ 2104078e994SBarry Smith nz = ai[r[i]+1] - ai[r[i]]; 2114078e994SBarry Smith ajtmpold = aj + ai[r[i]]; 2124078e994SBarry Smith v = aa + ai[r[i]]; 2137fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 2147fc0212eSBarry Smith 2157fc0212eSBarry Smith row = *ajtmp++; 2167fc0212eSBarry Smith while (row < i) { 2177fc0212eSBarry Smith pc = rtmp + row; 2187fc0212eSBarry Smith if (*pc != 0.0) { 2194078e994SBarry Smith pv = ba + diag_offset[row]; 2204078e994SBarry Smith pj = bj + diag_offset[row] + 1; 2217fc0212eSBarry Smith multiplier = *pc * *pv++; 2227fc0212eSBarry Smith *pc = multiplier; 2234078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 2247fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 225b0a32e0cSBarry Smith PetscLogFlops(1+2*nz); 2267fc0212eSBarry Smith } 2277fc0212eSBarry Smith row = *ajtmp++; 2287fc0212eSBarry Smith } 2297fc0212eSBarry Smith /* finished row so stick it into b->a */ 2304078e994SBarry Smith pv = ba + bi[i]; 2314078e994SBarry Smith pj = bj + bi[i]; 2324078e994SBarry Smith nz = bi[i+1] - bi[i]; 2337fc0212eSBarry Smith for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 2344078e994SBarry Smith diag = diag_offset[i] - bi[i]; 2357fc0212eSBarry Smith /* check pivot entry for current row */ 236a73cf429SBarry Smith if (pv[diag] == 0.0) { 23729bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 2387fc0212eSBarry Smith } 2397fc0212eSBarry Smith pv[diag] = 1.0/pv[diag]; 2407fc0212eSBarry Smith } 2417fc0212eSBarry Smith 242606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 2437fc0212eSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2447fc0212eSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2457fc0212eSBarry Smith C->factor = FACTOR_LU; 2467fc0212eSBarry Smith C->assembled = PETSC_TRUE; 247b0a32e0cSBarry Smith PetscLogFlops(C->n); 2483a40ed3dSBarry Smith PetscFunctionReturn(0); 2497fc0212eSBarry Smith } 2507fc0212eSBarry Smith 251273d9f13SBarry Smith 2525d517e7eSBarry Smith /* ----------------------------------------------------------- */ 2534a2ae208SSatish Balay #undef __FUNCT__ 2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ" 255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 2565d517e7eSBarry Smith { 257dfbe8321SBarry Smith PetscErrorCode ierr; 2585d517e7eSBarry Smith Mat C; 2595d517e7eSBarry Smith 2603a40ed3dSBarry Smith PetscFunctionBegin; 261b9b97703SBarry Smith ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 2627fc0212eSBarry Smith ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 263273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 264b0a32e0cSBarry Smith PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 2653a40ed3dSBarry Smith PetscFunctionReturn(0); 2665d517e7eSBarry Smith } 2674cd38560SBarry Smith 2684cd38560SBarry Smith 269