xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
35d517e7eSBarry Smith */
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
54078e994SBarry Smith #include "src/inline/ilu.h"
6ec3a8b7bSBarry Smith 
76d84be18SBarry Smith /* ------------------------------------------------------------*/
86d84be18SBarry Smith /*
94eeb42bcSBarry Smith       Version for when blocks are 2 by 2
104eeb42bcSBarry Smith */
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
144eeb42bcSBarry Smith {
154eeb42bcSBarry Smith   Mat                C = *B;
164eeb42bcSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
177cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
18*6849ba73SBarry Smith   PetscErrorCode ierr;
19*6849ba73SBarry Smith   int                *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
202515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
21329f5518SBarry Smith   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
232515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4;
243f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
254eeb42bcSBarry Smith 
263a40ed3dSBarry Smith   PetscFunctionBegin;
274eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
284eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29b0a32e0cSBarry Smith   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
304eeb42bcSBarry Smith 
314eeb42bcSBarry Smith   for (i=0; i<n; i++) {
324078e994SBarry Smith     nz    = bi[i+1] - bi[i];
334078e994SBarry Smith     ajtmp = bj + bi[i];
344eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
354eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
364eeb42bcSBarry Smith     }
374eeb42bcSBarry Smith     /* load in initial (unfactored row) */
384eeb42bcSBarry Smith     idx      = r[i];
394078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
404078e994SBarry Smith     ajtmpold = aj + ai[idx];
414078e994SBarry Smith     v        = aa + 4*ai[idx];
424eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
434eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
444eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
454eeb42bcSBarry Smith       v    += 4;
464eeb42bcSBarry Smith     }
474eeb42bcSBarry Smith     row = *ajtmp++;
484eeb42bcSBarry Smith     while (row < i) {
494eeb42bcSBarry Smith       pc = rtmp + 4*row;
504eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5188685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
524078e994SBarry Smith         pv = ba + 4*diag_offset[row];
534078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
544eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
554eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
564eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
574eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
584eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
594078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
604eeb42bcSBarry Smith         pv += 4;
614eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
624eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
634eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
644eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
654eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
664eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
674eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
684eeb42bcSBarry Smith           pv   += 4;
694eeb42bcSBarry Smith         }
70b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
714eeb42bcSBarry Smith       }
724eeb42bcSBarry Smith       row = *ajtmp++;
734eeb42bcSBarry Smith     }
744eeb42bcSBarry Smith     /* finished row so stick it into b->a */
754078e994SBarry Smith     pv = ba + 4*bi[i];
764078e994SBarry Smith     pj = bj + bi[i];
774078e994SBarry Smith     nz = bi[i+1] - bi[i];
784eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
794eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
804eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
814eeb42bcSBarry Smith       pv   += 4;
824eeb42bcSBarry Smith     }
834eeb42bcSBarry Smith     /* invert diagonal block */
844078e994SBarry Smith     w = ba + 4*diag_offset[i];
854cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
864eeb42bcSBarry Smith   }
874eeb42bcSBarry Smith 
88606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
894eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
904eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
914eeb42bcSBarry Smith   C->factor = FACTOR_LU;
924eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
93b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
943a40ed3dSBarry Smith   PetscFunctionReturn(0);
954eeb42bcSBarry Smith }
964cd38560SBarry Smith /*
974cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
984cd38560SBarry Smith */
994a2ae208SSatish Balay #undef __FUNCT__
1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1024cd38560SBarry Smith {
1034cd38560SBarry Smith   Mat                C = *B;
1044cd38560SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105dfbe8321SBarry Smith   PetscErrorCode ierr;
106dfbe8321SBarry Smith   int  i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1072515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
108329f5518SBarry Smith   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
1102515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1114cd38560SBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1124cd38560SBarry Smith 
1134cd38560SBarry Smith   PetscFunctionBegin;
114b0a32e0cSBarry Smith   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1154cd38560SBarry Smith 
1164cd38560SBarry Smith   for (i=0; i<n; i++) {
1174cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1184cd38560SBarry Smith     ajtmp = bj + bi[i];
1194cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1204cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1214cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1224cd38560SBarry Smith     }
1234cd38560SBarry Smith     /* load in initial (unfactored row) */
1244cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1254cd38560SBarry Smith     ajtmpold = aj + ai[i];
1264cd38560SBarry Smith     v        = aa + 4*ai[i];
1274cd38560SBarry Smith     for (j=0; j<nz; j++) {
1284cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1294cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1304cd38560SBarry Smith       v    += 4;
1314cd38560SBarry Smith     }
1324cd38560SBarry Smith     row = *ajtmp++;
1334cd38560SBarry Smith     while (row < i) {
1344cd38560SBarry Smith       pc  = rtmp + 4*row;
1354cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1364cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1374cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1384cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1394cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1404cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1414cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1424cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1434cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1444cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1454cd38560SBarry Smith         pv += 4;
1464cd38560SBarry Smith         for (j=0; j<nz; j++) {
1474cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1484cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1494cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1504cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1514cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1524cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1534cd38560SBarry Smith           pv   += 4;
1544cd38560SBarry Smith         }
155b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1564cd38560SBarry Smith       }
1574cd38560SBarry Smith       row = *ajtmp++;
1584cd38560SBarry Smith     }
1594cd38560SBarry Smith     /* finished row so stick it into b->a */
1604cd38560SBarry Smith     pv = ba + 4*bi[i];
1614cd38560SBarry Smith     pj = bj + bi[i];
1624cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1634cd38560SBarry Smith     for (j=0; j<nz; j++) {
1644cd38560SBarry Smith       x      = rtmp+4*pj[j];
1654cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1664cd38560SBarry Smith       pv   += 4;
1674cd38560SBarry Smith     }
1684cd38560SBarry Smith     /* invert diagonal block */
1694cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1704cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1714cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1724cd38560SBarry Smith   }
1734cd38560SBarry Smith 
174606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1754cd38560SBarry Smith   C->factor    = FACTOR_LU;
1764cd38560SBarry Smith   C->assembled = PETSC_TRUE;
177b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1784cd38560SBarry Smith   PetscFunctionReturn(0);
1794cd38560SBarry Smith }
1807fc0212eSBarry Smith 
1817fc0212eSBarry Smith /* ----------------------------------------------------------- */
1827fc0212eSBarry Smith /*
1837fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1847fc0212eSBarry Smith */
1854a2ae208SSatish Balay #undef __FUNCT__
1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
1887fc0212eSBarry Smith {
1897fc0212eSBarry Smith   Mat                C = *B;
1907fc0212eSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1917cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
192*6849ba73SBarry Smith   PetscErrorCode ierr;
193*6849ba73SBarry Smith   int                *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1944078e994SBarry Smith   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195329f5518SBarry Smith   int                *diag_offset = b->diag,diag,*pj;
196329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
1973f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1987fc0212eSBarry Smith 
1993a40ed3dSBarry Smith   PetscFunctionBegin;
2007fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2017fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202b0a32e0cSBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2037fc0212eSBarry Smith 
2047fc0212eSBarry Smith   for (i=0; i<n; i++) {
2054078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2064078e994SBarry Smith     ajtmp = bj + bi[i];
2077fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2087fc0212eSBarry Smith 
2097fc0212eSBarry Smith     /* load in initial (unfactored row) */
2104078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2114078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2124078e994SBarry Smith     v        = aa + ai[r[i]];
2137fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2147fc0212eSBarry Smith 
2157fc0212eSBarry Smith     row = *ajtmp++;
2167fc0212eSBarry Smith     while (row < i) {
2177fc0212eSBarry Smith       pc = rtmp + row;
2187fc0212eSBarry Smith       if (*pc != 0.0) {
2194078e994SBarry Smith         pv         = ba + diag_offset[row];
2204078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2217fc0212eSBarry Smith         multiplier = *pc * *pv++;
2227fc0212eSBarry Smith         *pc        = multiplier;
2234078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2247fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2267fc0212eSBarry Smith       }
2277fc0212eSBarry Smith       row = *ajtmp++;
2287fc0212eSBarry Smith     }
2297fc0212eSBarry Smith     /* finished row so stick it into b->a */
2304078e994SBarry Smith     pv = ba + bi[i];
2314078e994SBarry Smith     pj = bj + bi[i];
2324078e994SBarry Smith     nz = bi[i+1] - bi[i];
2337fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2344078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2357fc0212eSBarry Smith     /* check pivot entry for current row */
236a73cf429SBarry Smith     if (pv[diag] == 0.0) {
23729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2387fc0212eSBarry Smith     }
2397fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2407fc0212eSBarry Smith   }
2417fc0212eSBarry Smith 
242606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2437fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2447fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2457fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2467fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
247b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2483a40ed3dSBarry Smith   PetscFunctionReturn(0);
2497fc0212eSBarry Smith }
2507fc0212eSBarry Smith 
251273d9f13SBarry Smith 
2525d517e7eSBarry Smith /* ----------------------------------------------------------- */
2534a2ae208SSatish Balay #undef __FUNCT__
2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
2565d517e7eSBarry Smith {
257dfbe8321SBarry Smith   PetscErrorCode ierr;
2585d517e7eSBarry Smith   Mat         C;
2595d517e7eSBarry Smith 
2603a40ed3dSBarry Smith   PetscFunctionBegin;
261b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
2627fc0212eSBarry Smith   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
263273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
2665d517e7eSBarry Smith }
2674cd38560SBarry Smith 
2684cd38560SBarry Smith 
269