xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision a73cf4291978b9dde6c66f03ea260b274d7a5a16)
15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
35d517e7eSBarry Smith */
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
54078e994SBarry Smith #include "src/inline/ilu.h"
6ec3a8b7bSBarry Smith 
76d84be18SBarry Smith /* ------------------------------------------------------------*/
86d84be18SBarry Smith /*
94eeb42bcSBarry Smith       Version for when blocks are 2 by 2
104eeb42bcSBarry Smith */
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
134eeb42bcSBarry Smith int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
144eeb42bcSBarry Smith {
154eeb42bcSBarry Smith   Mat                C = *B;
164eeb42bcSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
177cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
184078e994SBarry Smith   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
192515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
20329f5518SBarry Smith   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
21329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
222515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4;
233f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
244eeb42bcSBarry Smith 
253a40ed3dSBarry Smith   PetscFunctionBegin;
264eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
274eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
28b0a32e0cSBarry Smith   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
294eeb42bcSBarry Smith 
304eeb42bcSBarry Smith   for (i=0; i<n; i++) {
314078e994SBarry Smith     nz    = bi[i+1] - bi[i];
324078e994SBarry Smith     ajtmp = bj + bi[i];
334eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
344eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
354eeb42bcSBarry Smith     }
364eeb42bcSBarry Smith     /* load in initial (unfactored row) */
374eeb42bcSBarry Smith     idx      = r[i];
384078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
394078e994SBarry Smith     ajtmpold = aj + ai[idx];
404078e994SBarry Smith     v        = aa + 4*ai[idx];
414eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
424eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
434eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
444eeb42bcSBarry Smith       v    += 4;
454eeb42bcSBarry Smith     }
464eeb42bcSBarry Smith     row = *ajtmp++;
474eeb42bcSBarry Smith     while (row < i) {
484eeb42bcSBarry Smith       pc = rtmp + 4*row;
494eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5088685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
514078e994SBarry Smith         pv = ba + 4*diag_offset[row];
524078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
534eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
544eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
554eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
564eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
574eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
584078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
594eeb42bcSBarry Smith         pv += 4;
604eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
614eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
624eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
634eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
644eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
654eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
664eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
674eeb42bcSBarry Smith           pv   += 4;
684eeb42bcSBarry Smith         }
69b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
704eeb42bcSBarry Smith       }
714eeb42bcSBarry Smith       row = *ajtmp++;
724eeb42bcSBarry Smith     }
734eeb42bcSBarry Smith     /* finished row so stick it into b->a */
744078e994SBarry Smith     pv = ba + 4*bi[i];
754078e994SBarry Smith     pj = bj + bi[i];
764078e994SBarry Smith     nz = bi[i+1] - bi[i];
774eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
784eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
794eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
804eeb42bcSBarry Smith       pv   += 4;
814eeb42bcSBarry Smith     }
824eeb42bcSBarry Smith     /* invert diagonal block */
834078e994SBarry Smith     w = ba + 4*diag_offset[i];
844cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
854eeb42bcSBarry Smith   }
864eeb42bcSBarry Smith 
87606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
884eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
894eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
904eeb42bcSBarry Smith   C->factor = FACTOR_LU;
914eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
92b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
933a40ed3dSBarry Smith   PetscFunctionReturn(0);
944eeb42bcSBarry Smith }
954cd38560SBarry Smith /*
964cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
974cd38560SBarry Smith */
984a2ae208SSatish Balay #undef __FUNCT__
994a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
1004cd38560SBarry Smith int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1014cd38560SBarry Smith {
1024cd38560SBarry Smith   Mat                C = *B;
1034cd38560SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1044cd38560SBarry Smith   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1052515f8d2SSatish Balay   int                *ajtmpold,*ajtmp,nz,row;
106329f5518SBarry Smith   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
107329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
1082515f8d2SSatish Balay   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1094cd38560SBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1104cd38560SBarry Smith 
1114cd38560SBarry Smith   PetscFunctionBegin;
112b0a32e0cSBarry Smith   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1134cd38560SBarry Smith 
1144cd38560SBarry Smith   for (i=0; i<n; i++) {
1154cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1164cd38560SBarry Smith     ajtmp = bj + bi[i];
1174cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1184cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1194cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1204cd38560SBarry Smith     }
1214cd38560SBarry Smith     /* load in initial (unfactored row) */
1224cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1234cd38560SBarry Smith     ajtmpold = aj + ai[i];
1244cd38560SBarry Smith     v        = aa + 4*ai[i];
1254cd38560SBarry Smith     for (j=0; j<nz; j++) {
1264cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1274cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1284cd38560SBarry Smith       v    += 4;
1294cd38560SBarry Smith     }
1304cd38560SBarry Smith     row = *ajtmp++;
1314cd38560SBarry Smith     while (row < i) {
1324cd38560SBarry Smith       pc  = rtmp + 4*row;
1334cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1344cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1354cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1364cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1374cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1384cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1394cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1404cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1414cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1424cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1434cd38560SBarry Smith         pv += 4;
1444cd38560SBarry Smith         for (j=0; j<nz; j++) {
1454cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1464cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1474cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1484cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1494cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1504cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1514cd38560SBarry Smith           pv   += 4;
1524cd38560SBarry Smith         }
153b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1544cd38560SBarry Smith       }
1554cd38560SBarry Smith       row = *ajtmp++;
1564cd38560SBarry Smith     }
1574cd38560SBarry Smith     /* finished row so stick it into b->a */
1584cd38560SBarry Smith     pv = ba + 4*bi[i];
1594cd38560SBarry Smith     pj = bj + bi[i];
1604cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1614cd38560SBarry Smith     for (j=0; j<nz; j++) {
1624cd38560SBarry Smith       x      = rtmp+4*pj[j];
1634cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1644cd38560SBarry Smith       pv   += 4;
1654cd38560SBarry Smith     }
1664cd38560SBarry Smith     /* invert diagonal block */
1674cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1684cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1694cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1704cd38560SBarry Smith   }
1714cd38560SBarry Smith 
172606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1734cd38560SBarry Smith   C->factor    = FACTOR_LU;
1744cd38560SBarry Smith   C->assembled = PETSC_TRUE;
175b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1764cd38560SBarry Smith   PetscFunctionReturn(0);
1774cd38560SBarry Smith }
1787fc0212eSBarry Smith 
1797fc0212eSBarry Smith /* ----------------------------------------------------------- */
1807fc0212eSBarry Smith /*
1817fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1827fc0212eSBarry Smith */
1834a2ae208SSatish Balay #undef __FUNCT__
1844a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
1857fc0212eSBarry Smith int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
1867fc0212eSBarry Smith {
1877fc0212eSBarry Smith   Mat                C = *B;
1887fc0212eSBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1897cdf2dbbSSatish Balay   IS                 isrow = b->row,isicol = b->icol;
1904078e994SBarry Smith   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
1914078e994SBarry Smith   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
192329f5518SBarry Smith   int                *diag_offset = b->diag,diag,*pj;
193329f5518SBarry Smith   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
1943f1db9ecSBarry Smith   MatScalar          *ba = b->a,*aa = a->a;
1957fc0212eSBarry Smith 
1963a40ed3dSBarry Smith   PetscFunctionBegin;
1977fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1987fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
199b0a32e0cSBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2007fc0212eSBarry Smith 
2017fc0212eSBarry Smith   for (i=0; i<n; i++) {
2024078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2034078e994SBarry Smith     ajtmp = bj + bi[i];
2047fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2057fc0212eSBarry Smith 
2067fc0212eSBarry Smith     /* load in initial (unfactored row) */
2074078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2084078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2094078e994SBarry Smith     v        = aa + ai[r[i]];
2107fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2117fc0212eSBarry Smith 
2127fc0212eSBarry Smith     row = *ajtmp++;
2137fc0212eSBarry Smith     while (row < i) {
2147fc0212eSBarry Smith       pc = rtmp + row;
2157fc0212eSBarry Smith       if (*pc != 0.0) {
2164078e994SBarry Smith         pv         = ba + diag_offset[row];
2174078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2187fc0212eSBarry Smith         multiplier = *pc * *pv++;
2197fc0212eSBarry Smith         *pc        = multiplier;
2204078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2217fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
222b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2237fc0212eSBarry Smith       }
2247fc0212eSBarry Smith       row = *ajtmp++;
2257fc0212eSBarry Smith     }
2267fc0212eSBarry Smith     /* finished row so stick it into b->a */
2274078e994SBarry Smith     pv = ba + bi[i];
2284078e994SBarry Smith     pj = bj + bi[i];
2294078e994SBarry Smith     nz = bi[i+1] - bi[i];
2307fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2314078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2327fc0212eSBarry Smith     /* check pivot entry for current row */
233*a73cf429SBarry Smith     if (pv[diag] == 0.0) {
23429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2357fc0212eSBarry Smith     }
2367fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2377fc0212eSBarry Smith   }
2387fc0212eSBarry Smith 
239606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2407fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2417fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2427fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2437fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
244b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2453a40ed3dSBarry Smith   PetscFunctionReturn(0);
2467fc0212eSBarry Smith }
2477fc0212eSBarry Smith 
248273d9f13SBarry Smith 
2495d517e7eSBarry Smith /* ----------------------------------------------------------- */
2504a2ae208SSatish Balay #undef __FUNCT__
2514a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ"
252b380c88cSHong Zhang int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
2535d517e7eSBarry Smith {
254273d9f13SBarry Smith   int         ierr;
2555d517e7eSBarry Smith   Mat         C;
2565d517e7eSBarry Smith 
2573a40ed3dSBarry Smith   PetscFunctionBegin;
258b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
2597fc0212eSBarry Smith   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
260273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
261b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2623a40ed3dSBarry Smith   PetscFunctionReturn(0);
2635d517e7eSBarry Smith }
2644cd38560SBarry Smith 
2654cd38560SBarry Smith 
266