xref: /petsc/src/mat/impls/baij/seq/baijfact4.c (revision 9566063d113dddea24716c546802770db7481bc0)
1be1d678aSKris Buschelman 
283287d42SBarry Smith /*
383287d42SBarry Smith     Factorization code for BAIJ format.
483287d42SBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
783287d42SBarry Smith 
883287d42SBarry Smith /* ----------------------------------------------------------- */
906e38f1dSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_inplace(Mat C,Mat A,const MatFactorInfo *info)
1083287d42SBarry Smith {
1183287d42SBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data;
1283287d42SBarry Smith   IS             isrow = b->row,isicol = b->icol;
135d0c19d7SBarry Smith   const PetscInt *r,*ic;
145d0c19d7SBarry Smith   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
153bc0b13bSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai=a->i,*aj=a->j,k,flg;
16d0f46423SBarry Smith   PetscInt       *diag_offset=b->diag,diag,bs=A->rmap->bs,bs2 = a->bs2,*pj,*v_pivots;
1783287d42SBarry Smith   MatScalar      *ba = b->a,*aa = a->a,*pv,*v,*rtmp,*multiplier,*v_work,*pc,*w;
185f8bbccaSHong Zhang   PetscBool      allowzeropivot,zeropivotdetected;
1983287d42SBarry Smith 
2083287d42SBarry Smith   PetscFunctionBegin;
21*9566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow,&r));
22*9566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol,&ic));
235f8bbccaSHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
245f8bbccaSHong Zhang 
25*9566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs2*(n+1),&rtmp));
2683287d42SBarry Smith   /* generate work space needed by dense LU factorization */
27*9566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(bs,&v_work,bs2,&multiplier,bs,&v_pivots));
2883287d42SBarry Smith 
2983287d42SBarry Smith   for (i=0; i<n; i++) {
3083287d42SBarry Smith     nz    = bi[i+1] - bi[i];
3183287d42SBarry Smith     ajtmp = bj + bi[i];
3283287d42SBarry Smith     for  (j=0; j<nz; j++) {
33*9566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rtmp+bs2*ajtmp[j],bs2));
3483287d42SBarry Smith     }
3583287d42SBarry Smith     /* load in initial (unfactored row) */
3683287d42SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
3783287d42SBarry Smith     ajtmpold = aj + ai[r[i]];
3883287d42SBarry Smith     v        = aa + bs2*ai[r[i]];
3983287d42SBarry Smith     for (j=0; j<nz; j++) {
40*9566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(rtmp+bs2*ic[ajtmpold[j]],v+bs2*j,bs2));
4183287d42SBarry Smith     }
4283287d42SBarry Smith     row = *ajtmp++;
4383287d42SBarry Smith     while (row < i) {
4483287d42SBarry Smith       pc = rtmp + bs2*row;
4583287d42SBarry Smith /*      if (*pc) { */
46c35f09e5SBarry Smith       for (flg=0,k=0; k<bs2; k++) {
47c35f09e5SBarry Smith         if (pc[k]!=0.0) {
48c35f09e5SBarry Smith           flg = 1;
49c35f09e5SBarry Smith           break;
50c35f09e5SBarry Smith         }
51c35f09e5SBarry Smith       }
5283287d42SBarry Smith       if (flg) {
5383287d42SBarry Smith         pv = ba + bs2*diag_offset[row];
5483287d42SBarry Smith         pj = bj + diag_offset[row] + 1;
5596b95a6bSBarry Smith         PetscKernel_A_gets_A_times_B(bs,pc,pv,multiplier);
5683287d42SBarry Smith         nz  = bi[row+1] - diag_offset[row] - 1;
5783287d42SBarry Smith         pv += bs2;
5883287d42SBarry Smith         for (j=0; j<nz; j++) {
5996b95a6bSBarry Smith           PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j);
6083287d42SBarry Smith         }
61*9566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0*bs*bs2*(nz+1.0)-bs));
6283287d42SBarry Smith       }
6383287d42SBarry Smith       row = *ajtmp++;
6483287d42SBarry Smith     }
6583287d42SBarry Smith     /* finished row so stick it into b->a */
6683287d42SBarry Smith     pv = ba + bs2*bi[i];
6783287d42SBarry Smith     pj = bj + bi[i];
6883287d42SBarry Smith     nz = bi[i+1] - bi[i];
6983287d42SBarry Smith     for (j=0; j<nz; j++) {
70*9566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(pv+bs2*j,rtmp+bs2*pj[j],bs2));
7183287d42SBarry Smith     }
7283287d42SBarry Smith     diag = diag_offset[i] - bi[i];
7383287d42SBarry Smith     /* invert diagonal block */
7483287d42SBarry Smith     w    = pv + bs2*diag;
755f8bbccaSHong Zhang 
76*9566063dSJacob Faibussowitsch     PetscCall(PetscKernel_A_gets_inverse_A(bs,w,v_pivots,v_work,allowzeropivot,&zeropivotdetected));
777b6c816cSBarry Smith     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
7883287d42SBarry Smith   }
7983287d42SBarry Smith 
80*9566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
81*9566063dSJacob Faibussowitsch   PetscCall(PetscFree3(v_work,multiplier,v_pivots));
82*9566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol,&ic));
83*9566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow,&r));
8426fbe8dcSKarl Rupp 
8506e38f1dSHong Zhang   C->ops->solve          = MatSolve_SeqBAIJ_N_inplace;
8606e38f1dSHong Zhang   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N_inplace;
8783287d42SBarry Smith   C->assembled           = PETSC_TRUE;
8826fbe8dcSKarl Rupp 
89*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(1.333333333333*bs*bs2*b->mbs)); /* from inverting diagonal blocks */
9083287d42SBarry Smith   PetscFunctionReturn(0);
9183287d42SBarry Smith }
92