xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1*b0a32e0cSBarry Smith /*$Id: baijfact.c,v 1.87 2001/01/06 15:35:10 bsmith Exp bsmith $*/
25d517e7eSBarry Smith /*
3ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
45d517e7eSBarry Smith */
570f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
74078e994SBarry Smith #include "src/inline/ilu.h"
8ec3a8b7bSBarry Smith 
96d84be18SBarry Smith /* ------------------------------------------------------------*/
106d84be18SBarry Smith /*
114eeb42bcSBarry Smith       Version for when blocks are 2 by 2
124eeb42bcSBarry Smith */
135615d1e5SSatish Balay #undef __FUNC__
149d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_2"
154eeb42bcSBarry Smith int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
164eeb42bcSBarry Smith {
174eeb42bcSBarry Smith   Mat                C = *B;
184eeb42bcSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
197cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
204078e994SBarry Smith   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
212515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
22329f5518SBarry Smith   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
23329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
242515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4;
253f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
264eeb42bcSBarry Smith 
273a40ed3dSBarry Smith   PetscFunctionBegin;
284eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
294eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
30*b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&  rtmp  );CHKERRQ(ierr);
314eeb42bcSBarry Smith 
324eeb42bcSBarry Smith   for (i=0; i<n; i++) {
334078e994SBarry Smith     nz    = bi[i+1] - bi[i];
344078e994SBarry Smith     ajtmp = bj + bi[i];
354eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
364eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
374eeb42bcSBarry Smith     }
384eeb42bcSBarry Smith     /* load in initial (unfactored row) */
394eeb42bcSBarry Smith     idx      = r[i];
404078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
414078e994SBarry Smith     ajtmpold = aj + ai[idx];
424078e994SBarry Smith     v        = aa + 4*ai[idx];
434eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
444eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
454eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
464eeb42bcSBarry Smith       v    += 4;
474eeb42bcSBarry Smith     }
484eeb42bcSBarry Smith     row = *ajtmp++;
494eeb42bcSBarry Smith     while (row < i) {
504eeb42bcSBarry Smith       pc = rtmp + 4*row;
514eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5288685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
534078e994SBarry Smith         pv = ba + 4*diag_offset[row];
544078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
554eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
564eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
574eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
584eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
594eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
604078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
614eeb42bcSBarry Smith         pv += 4;
624eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
634eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
644eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
654eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
664eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
674eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
684eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
694eeb42bcSBarry Smith           pv   += 4;
704eeb42bcSBarry Smith         }
71*b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
724eeb42bcSBarry Smith       }
734eeb42bcSBarry Smith       row = *ajtmp++;
744eeb42bcSBarry Smith     }
754eeb42bcSBarry Smith     /* finished row so stick it into b->a */
764078e994SBarry Smith     pv = ba + 4*bi[i];
774078e994SBarry Smith     pj = bj + bi[i];
784078e994SBarry Smith     nz = bi[i+1] - bi[i];
794eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
804eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
814eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
824eeb42bcSBarry Smith       pv   += 4;
834eeb42bcSBarry Smith     }
844eeb42bcSBarry Smith     /* invert diagonal block */
854078e994SBarry Smith     w = ba + 4*diag_offset[i];
864cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
874eeb42bcSBarry Smith   }
884eeb42bcSBarry Smith 
89606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
904eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
914eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
924eeb42bcSBarry Smith   C->factor = FACTOR_LU;
934eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
94*b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
953a40ed3dSBarry Smith   PetscFunctionReturn(0);
964eeb42bcSBarry Smith }
974cd38560SBarry Smith /*
984cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
994cd38560SBarry Smith */
1004cd38560SBarry Smith #undef __FUNC__
1019d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
1024cd38560SBarry Smith int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1034cd38560SBarry Smith {
1044cd38560SBarry Smith   Mat                C = *B;
1054cd38560SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1064cd38560SBarry Smith   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1072515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
108329f5518SBarry Smith   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
1102515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1114cd38560SBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1124cd38560SBarry Smith 
1134cd38560SBarry Smith   PetscFunctionBegin;
114*b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&  rtmp  );CHKERRQ(ierr);
1154cd38560SBarry Smith 
1164cd38560SBarry Smith   for (i=0; i<n; i++) {
1174cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1184cd38560SBarry Smith     ajtmp = bj + bi[i];
1194cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1204cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1214cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1224cd38560SBarry Smith     }
1234cd38560SBarry Smith     /* load in initial (unfactored row) */
1244cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1254cd38560SBarry Smith     ajtmpold = aj + ai[i];
1264cd38560SBarry Smith     v        = aa + 4*ai[i];
1274cd38560SBarry Smith     for (j=0; j<nz; j++) {
1284cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1294cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1304cd38560SBarry Smith       v    += 4;
1314cd38560SBarry Smith     }
1324cd38560SBarry Smith     row = *ajtmp++;
1334cd38560SBarry Smith     while (row < i) {
1344cd38560SBarry Smith       pc  = rtmp + 4*row;
1354cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1364cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1374cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1384cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1394cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1404cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1414cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1424cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1434cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1444cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1454cd38560SBarry Smith         pv += 4;
1464cd38560SBarry Smith         for (j=0; j<nz; j++) {
1474cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1484cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1494cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1504cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1514cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1524cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1534cd38560SBarry Smith           pv   += 4;
1544cd38560SBarry Smith         }
155*b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1564cd38560SBarry Smith       }
1574cd38560SBarry Smith       row = *ajtmp++;
1584cd38560SBarry Smith     }
1594cd38560SBarry Smith     /* finished row so stick it into b->a */
1604cd38560SBarry Smith     pv = ba + 4*bi[i];
1614cd38560SBarry Smith     pj = bj + bi[i];
1624cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1634cd38560SBarry Smith     for (j=0; j<nz; j++) {
1644cd38560SBarry Smith       x      = rtmp+4*pj[j];
1654cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1664cd38560SBarry Smith       pv   += 4;
1674cd38560SBarry Smith     }
1684cd38560SBarry Smith     /* invert diagonal block */
1694cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1704cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1714cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1724cd38560SBarry Smith   }
1734cd38560SBarry Smith 
174606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1754cd38560SBarry Smith   C->factor    = FACTOR_LU;
1764cd38560SBarry Smith   C->assembled = PETSC_TRUE;
177*b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1784cd38560SBarry Smith   PetscFunctionReturn(0);
1794cd38560SBarry Smith }
1807fc0212eSBarry Smith 
1817fc0212eSBarry Smith /* ----------------------------------------------------------- */
1827fc0212eSBarry Smith /*
1837fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1847fc0212eSBarry Smith */
1855615d1e5SSatish Balay #undef __FUNC__
1869d082aa4SBarry Smith #define __FUNC__ "MatLUFactorNumeric_SeqBAIJ_1"
1877fc0212eSBarry Smith int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
1887fc0212eSBarry Smith {
1897fc0212eSBarry Smith   Mat                C = *B;
1907fc0212eSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1917cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
1924078e994SBarry Smith   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1934078e994SBarry Smith   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
194329f5518SBarry Smith   int                *diag_offset = b->diag,diag,*pj;
195329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
1963f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1977fc0212eSBarry Smith 
1983a40ed3dSBarry Smith   PetscFunctionBegin;
1997fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2007fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
201*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(MatScalar),&  rtmp  );CHKERRQ(ierr);
2027fc0212eSBarry Smith 
2037fc0212eSBarry Smith   for (i=0; i<n; i++) {
2044078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2054078e994SBarry Smith     ajtmp = bj + bi[i];
2067fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2077fc0212eSBarry Smith 
2087fc0212eSBarry Smith     /* load in initial (unfactored row) */
2094078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2104078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2114078e994SBarry Smith     v        = aa + ai[r[i]];
2127fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2137fc0212eSBarry Smith 
2147fc0212eSBarry Smith     row = *ajtmp++;
2157fc0212eSBarry Smith     while (row < i) {
2167fc0212eSBarry Smith       pc = rtmp + row;
2177fc0212eSBarry Smith       if (*pc != 0.0) {
2184078e994SBarry Smith         pv         = ba + diag_offset[row];
2194078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2207fc0212eSBarry Smith         multiplier = *pc * *pv++;
2217fc0212eSBarry Smith         *pc        = multiplier;
2224078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2237fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
224*b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2257fc0212eSBarry Smith       }
2267fc0212eSBarry Smith       row = *ajtmp++;
2277fc0212eSBarry Smith     }
2287fc0212eSBarry Smith     /* finished row so stick it into b->a */
2294078e994SBarry Smith     pv = ba + bi[i];
2304078e994SBarry Smith     pj = bj + bi[i];
2314078e994SBarry Smith     nz = bi[i+1] - bi[i];
2327fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2334078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2347fc0212eSBarry Smith     /* check pivot entry for current row */
2357fc0212eSBarry Smith     if (pv[diag] == 0.0) {
23629bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2377fc0212eSBarry Smith     }
2387fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2397fc0212eSBarry Smith   }
2407fc0212eSBarry Smith 
241606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2427fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2437fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2447fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2457fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
246*b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2473a40ed3dSBarry Smith   PetscFunctionReturn(0);
2487fc0212eSBarry Smith }
2497fc0212eSBarry Smith 
250273d9f13SBarry Smith 
2515d517e7eSBarry Smith /* ----------------------------------------------------------- */
2525615d1e5SSatish Balay #undef __FUNC__
253*b0a32e0cSBarry Smith #define __FUNC__ "MatLUFactor_SeqBAIJ"
254b9b97703SBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatLUInfo *info)
2555d517e7eSBarry Smith {
256273d9f13SBarry Smith   int         ierr;
2575d517e7eSBarry Smith   Mat         C;
2585d517e7eSBarry Smith 
2593a40ed3dSBarry Smith   PetscFunctionBegin;
260b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
2617fc0212eSBarry Smith   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
262273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
263*b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2643a40ed3dSBarry Smith   PetscFunctionReturn(0);
2655d517e7eSBarry Smith }
2664cd38560SBarry Smith 
2674cd38560SBarry Smith 
268