1*b0a32e0cSBarry Smith /*$Id: baijfact.c,v 1.87 2001/01/06 15:35:10 bsmith Exp bsmith $*/ 25d517e7eSBarry Smith /* 3ec3a8b7bSBarry Smith Factorization code for BAIJ format. 45d517e7eSBarry Smith */ 570f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 74078e994SBarry Smith #include "src/inline/ilu.h" 8ec3a8b7bSBarry Smith 96d84be18SBarry Smith /* ------------------------------------------------------------*/ 106d84be18SBarry Smith /* 114eeb42bcSBarry Smith Version for when blocks are 2 by 2 124eeb42bcSBarry Smith */ 135615d1e5SSatish Balay #undef __FUNC__ 149d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_2" 154eeb42bcSBarry Smith int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 164eeb42bcSBarry Smith { 174eeb42bcSBarry Smith Mat C = *B; 184eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 197cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 204078e994SBarry Smith int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 212515f8d2SSatish Balay int *ajtmpold,*ajtmp,nz,row; 22329f5518SBarry Smith int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 23329f5518SBarry Smith MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 242515f8d2SSatish Balay MatScalar p1,p2,p3,p4; 253f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 264eeb42bcSBarry Smith 273a40ed3dSBarry Smith PetscFunctionBegin; 284eeb42bcSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 294eeb42bcSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 30*b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),& rtmp );CHKERRQ(ierr); 314eeb42bcSBarry Smith 324eeb42bcSBarry Smith for (i=0; i<n; i++) { 334078e994SBarry Smith nz = bi[i+1] - bi[i]; 344078e994SBarry Smith ajtmp = bj + bi[i]; 354eeb42bcSBarry Smith for (j=0; j<nz; j++) { 364eeb42bcSBarry Smith x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 374eeb42bcSBarry Smith } 384eeb42bcSBarry Smith /* load in initial (unfactored row) */ 394eeb42bcSBarry Smith idx = r[i]; 404078e994SBarry Smith nz = ai[idx+1] - ai[idx]; 414078e994SBarry Smith ajtmpold = aj + ai[idx]; 424078e994SBarry Smith v = aa + 4*ai[idx]; 434eeb42bcSBarry Smith for (j=0; j<nz; j++) { 444eeb42bcSBarry Smith x = rtmp+4*ic[ajtmpold[j]]; 454eeb42bcSBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 464eeb42bcSBarry Smith v += 4; 474eeb42bcSBarry Smith } 484eeb42bcSBarry Smith row = *ajtmp++; 494eeb42bcSBarry Smith while (row < i) { 504eeb42bcSBarry Smith pc = rtmp + 4*row; 514eeb42bcSBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 5288685aaeSLois Curfman McInnes if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 534078e994SBarry Smith pv = ba + 4*diag_offset[row]; 544078e994SBarry Smith pj = bj + diag_offset[row] + 1; 554eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 564eeb42bcSBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 574eeb42bcSBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 584eeb42bcSBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 594eeb42bcSBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 604078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 614eeb42bcSBarry Smith pv += 4; 624eeb42bcSBarry Smith for (j=0; j<nz; j++) { 634eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 644eeb42bcSBarry Smith x = rtmp + 4*pj[j]; 654eeb42bcSBarry Smith x[0] -= m1*x1 + m3*x2; 664eeb42bcSBarry Smith x[1] -= m2*x1 + m4*x2; 674eeb42bcSBarry Smith x[2] -= m1*x3 + m3*x4; 684eeb42bcSBarry Smith x[3] -= m2*x3 + m4*x4; 694eeb42bcSBarry Smith pv += 4; 704eeb42bcSBarry Smith } 71*b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 724eeb42bcSBarry Smith } 734eeb42bcSBarry Smith row = *ajtmp++; 744eeb42bcSBarry Smith } 754eeb42bcSBarry Smith /* finished row so stick it into b->a */ 764078e994SBarry Smith pv = ba + 4*bi[i]; 774078e994SBarry Smith pj = bj + bi[i]; 784078e994SBarry Smith nz = bi[i+1] - bi[i]; 794eeb42bcSBarry Smith for (j=0; j<nz; j++) { 804eeb42bcSBarry Smith x = rtmp+4*pj[j]; 814eeb42bcSBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 824eeb42bcSBarry Smith pv += 4; 834eeb42bcSBarry Smith } 844eeb42bcSBarry Smith /* invert diagonal block */ 854078e994SBarry Smith w = ba + 4*diag_offset[i]; 864cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 874eeb42bcSBarry Smith } 884eeb42bcSBarry Smith 89606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 904eeb42bcSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 914eeb42bcSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 924eeb42bcSBarry Smith C->factor = FACTOR_LU; 934eeb42bcSBarry Smith C->assembled = PETSC_TRUE; 94*b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 953a40ed3dSBarry Smith PetscFunctionReturn(0); 964eeb42bcSBarry Smith } 974cd38560SBarry Smith /* 984cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering 994cd38560SBarry Smith */ 1004cd38560SBarry Smith #undef __FUNC__ 1019d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 1024cd38560SBarry Smith int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 1034cd38560SBarry Smith { 1044cd38560SBarry Smith Mat C = *B; 1054cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 1064cd38560SBarry Smith int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 1072515f8d2SSatish Balay int *ajtmpold,*ajtmp,nz,row; 108329f5518SBarry Smith int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109329f5518SBarry Smith MatScalar *pv,*v,*rtmp,*pc,*w,*x; 1102515f8d2SSatish Balay MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 1114cd38560SBarry Smith MatScalar *ba = b->a,*aa = a->a; 1124cd38560SBarry Smith 1134cd38560SBarry Smith PetscFunctionBegin; 114*b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),& rtmp );CHKERRQ(ierr); 1154cd38560SBarry Smith 1164cd38560SBarry Smith for (i=0; i<n; i++) { 1174cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1184cd38560SBarry Smith ajtmp = bj + bi[i]; 1194cd38560SBarry Smith for (j=0; j<nz; j++) { 1204cd38560SBarry Smith x = rtmp+4*ajtmp[j]; 1214cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0; 1224cd38560SBarry Smith } 1234cd38560SBarry Smith /* load in initial (unfactored row) */ 1244cd38560SBarry Smith nz = ai[i+1] - ai[i]; 1254cd38560SBarry Smith ajtmpold = aj + ai[i]; 1264cd38560SBarry Smith v = aa + 4*ai[i]; 1274cd38560SBarry Smith for (j=0; j<nz; j++) { 1284cd38560SBarry Smith x = rtmp+4*ajtmpold[j]; 1294cd38560SBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 1304cd38560SBarry Smith v += 4; 1314cd38560SBarry Smith } 1324cd38560SBarry Smith row = *ajtmp++; 1334cd38560SBarry Smith while (row < i) { 1344cd38560SBarry Smith pc = rtmp + 4*row; 1354cd38560SBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 1364cd38560SBarry Smith if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 1374cd38560SBarry Smith pv = ba + 4*diag_offset[row]; 1384cd38560SBarry Smith pj = bj + diag_offset[row] + 1; 1394cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1404cd38560SBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 1414cd38560SBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 1424cd38560SBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 1434cd38560SBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 1444cd38560SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 1454cd38560SBarry Smith pv += 4; 1464cd38560SBarry Smith for (j=0; j<nz; j++) { 1474cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1484cd38560SBarry Smith x = rtmp + 4*pj[j]; 1494cd38560SBarry Smith x[0] -= m1*x1 + m3*x2; 1504cd38560SBarry Smith x[1] -= m2*x1 + m4*x2; 1514cd38560SBarry Smith x[2] -= m1*x3 + m3*x4; 1524cd38560SBarry Smith x[3] -= m2*x3 + m4*x4; 1534cd38560SBarry Smith pv += 4; 1544cd38560SBarry Smith } 155*b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 1564cd38560SBarry Smith } 1574cd38560SBarry Smith row = *ajtmp++; 1584cd38560SBarry Smith } 1594cd38560SBarry Smith /* finished row so stick it into b->a */ 1604cd38560SBarry Smith pv = ba + 4*bi[i]; 1614cd38560SBarry Smith pj = bj + bi[i]; 1624cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1634cd38560SBarry Smith for (j=0; j<nz; j++) { 1644cd38560SBarry Smith x = rtmp+4*pj[j]; 1654cd38560SBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 1664cd38560SBarry Smith pv += 4; 1674cd38560SBarry Smith } 1684cd38560SBarry Smith /* invert diagonal block */ 1694cd38560SBarry Smith w = ba + 4*diag_offset[i]; 1704cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 1714cd38560SBarry Smith /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 1724cd38560SBarry Smith } 1734cd38560SBarry Smith 174606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1754cd38560SBarry Smith C->factor = FACTOR_LU; 1764cd38560SBarry Smith C->assembled = PETSC_TRUE; 177*b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1784cd38560SBarry Smith PetscFunctionReturn(0); 1794cd38560SBarry Smith } 1807fc0212eSBarry Smith 1817fc0212eSBarry Smith /* ----------------------------------------------------------- */ 1827fc0212eSBarry Smith /* 1837fc0212eSBarry Smith Version for when blocks are 1 by 1. 1847fc0212eSBarry Smith */ 1855615d1e5SSatish Balay #undef __FUNC__ 1869d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_1" 1877fc0212eSBarry Smith int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 1887fc0212eSBarry Smith { 1897fc0212eSBarry Smith Mat C = *B; 1907fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 1917cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 1924078e994SBarry Smith int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 1934078e994SBarry Smith int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 194329f5518SBarry Smith int *diag_offset = b->diag,diag,*pj; 195329f5518SBarry Smith MatScalar *pv,*v,*rtmp,multiplier,*pc; 1963f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 1977fc0212eSBarry Smith 1983a40ed3dSBarry Smith PetscFunctionBegin; 1997fc0212eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2007fc0212eSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 201*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(MatScalar),& rtmp );CHKERRQ(ierr); 2027fc0212eSBarry Smith 2037fc0212eSBarry Smith for (i=0; i<n; i++) { 2044078e994SBarry Smith nz = bi[i+1] - bi[i]; 2054078e994SBarry Smith ajtmp = bj + bi[i]; 2067fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 2077fc0212eSBarry Smith 2087fc0212eSBarry Smith /* load in initial (unfactored row) */ 2094078e994SBarry Smith nz = ai[r[i]+1] - ai[r[i]]; 2104078e994SBarry Smith ajtmpold = aj + ai[r[i]]; 2114078e994SBarry Smith v = aa + ai[r[i]]; 2127fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 2137fc0212eSBarry Smith 2147fc0212eSBarry Smith row = *ajtmp++; 2157fc0212eSBarry Smith while (row < i) { 2167fc0212eSBarry Smith pc = rtmp + row; 2177fc0212eSBarry Smith if (*pc != 0.0) { 2184078e994SBarry Smith pv = ba + diag_offset[row]; 2194078e994SBarry Smith pj = bj + diag_offset[row] + 1; 2207fc0212eSBarry Smith multiplier = *pc * *pv++; 2217fc0212eSBarry Smith *pc = multiplier; 2224078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 2237fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 224*b0a32e0cSBarry Smith PetscLogFlops(1+2*nz); 2257fc0212eSBarry Smith } 2267fc0212eSBarry Smith row = *ajtmp++; 2277fc0212eSBarry Smith } 2287fc0212eSBarry Smith /* finished row so stick it into b->a */ 2294078e994SBarry Smith pv = ba + bi[i]; 2304078e994SBarry Smith pj = bj + bi[i]; 2314078e994SBarry Smith nz = bi[i+1] - bi[i]; 2327fc0212eSBarry Smith for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 2334078e994SBarry Smith diag = diag_offset[i] - bi[i]; 2347fc0212eSBarry Smith /* check pivot entry for current row */ 2357fc0212eSBarry Smith if (pv[diag] == 0.0) { 23629bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 2377fc0212eSBarry Smith } 2387fc0212eSBarry Smith pv[diag] = 1.0/pv[diag]; 2397fc0212eSBarry Smith } 2407fc0212eSBarry Smith 241606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 2427fc0212eSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2437fc0212eSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2447fc0212eSBarry Smith C->factor = FACTOR_LU; 2457fc0212eSBarry Smith C->assembled = PETSC_TRUE; 246*b0a32e0cSBarry Smith PetscLogFlops(C->n); 2473a40ed3dSBarry Smith PetscFunctionReturn(0); 2487fc0212eSBarry Smith } 2497fc0212eSBarry Smith 250273d9f13SBarry Smith 2515d517e7eSBarry Smith /* ----------------------------------------------------------- */ 2525615d1e5SSatish Balay #undef __FUNC__ 253*b0a32e0cSBarry Smith #define __FUNC__ "MatLUFactor_SeqBAIJ" 254b9b97703SBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatLUInfo *info) 2555d517e7eSBarry Smith { 256273d9f13SBarry Smith int ierr; 2575d517e7eSBarry Smith Mat C; 2585d517e7eSBarry Smith 2593a40ed3dSBarry Smith PetscFunctionBegin; 260b9b97703SBarry Smith ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 2617fc0212eSBarry Smith ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 262273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 263*b0a32e0cSBarry Smith PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 2643a40ed3dSBarry Smith PetscFunctionReturn(0); 2655d517e7eSBarry Smith } 2664cd38560SBarry Smith 2674cd38560SBarry Smith 268