xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision 958c9bcce270cdc364c8832dcd56bbb1c4a38e24)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include "src/mat/impls/baij/seq/baij.h"
5 #include "src/inline/ilu.h"
6 
7 /* ------------------------------------------------------------*/
8 /*
9       Version for when blocks are 2 by 2
10 */
11 #undef __FUNCT__
12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13 int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
14 {
15   Mat                C = *B;
16   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
17   IS                 isrow = b->row,isicol = b->icol;
18   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
19   int                *ajtmpold,*ajtmp,nz,row;
20   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
21   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
22   MatScalar          p1,p2,p3,p4;
23   MatScalar          *ba = b->a,*aa = a->a;
24 
25   PetscFunctionBegin;
26   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
27   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
28   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
29 
30   for (i=0; i<n; i++) {
31     nz    = bi[i+1] - bi[i];
32     ajtmp = bj + bi[i];
33     for  (j=0; j<nz; j++) {
34       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
35     }
36     /* load in initial (unfactored row) */
37     idx      = r[i];
38     nz       = ai[idx+1] - ai[idx];
39     ajtmpold = aj + ai[idx];
40     v        = aa + 4*ai[idx];
41     for (j=0; j<nz; j++) {
42       x    = rtmp+4*ic[ajtmpold[j]];
43       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
44       v    += 4;
45     }
46     row = *ajtmp++;
47     while (row < i) {
48       pc = rtmp + 4*row;
49       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
50       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
51         pv = ba + 4*diag_offset[row];
52         pj = bj + diag_offset[row] + 1;
53         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
54         pc[0] = m1 = p1*x1 + p3*x2;
55         pc[1] = m2 = p2*x1 + p4*x2;
56         pc[2] = m3 = p1*x3 + p3*x4;
57         pc[3] = m4 = p2*x3 + p4*x4;
58         nz = bi[row+1] - diag_offset[row] - 1;
59         pv += 4;
60         for (j=0; j<nz; j++) {
61           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
62           x    = rtmp + 4*pj[j];
63           x[0] -= m1*x1 + m3*x2;
64           x[1] -= m2*x1 + m4*x2;
65           x[2] -= m1*x3 + m3*x4;
66           x[3] -= m2*x3 + m4*x4;
67           pv   += 4;
68         }
69         PetscLogFlops(16*nz+12);
70       }
71       row = *ajtmp++;
72     }
73     /* finished row so stick it into b->a */
74     pv = ba + 4*bi[i];
75     pj = bj + bi[i];
76     nz = bi[i+1] - bi[i];
77     for (j=0; j<nz; j++) {
78       x     = rtmp+4*pj[j];
79       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
80       pv   += 4;
81     }
82     /* invert diagonal block */
83     w = ba + 4*diag_offset[i];
84     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
85   }
86 
87   ierr = PetscFree(rtmp);CHKERRQ(ierr);
88   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
89   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
90   C->factor = FACTOR_LU;
91   C->assembled = PETSC_TRUE;
92   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
93   PetscFunctionReturn(0);
94 }
95 /*
96       Version for when blocks are 2 by 2 Using natural ordering
97 */
98 #undef __FUNCT__
99 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
100 int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
101 {
102   Mat                C = *B;
103   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
104   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
105   int                *ajtmpold,*ajtmp,nz,row;
106   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
107   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
108   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
109   MatScalar          *ba = b->a,*aa = a->a;
110 
111   PetscFunctionBegin;
112   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
113 
114   for (i=0; i<n; i++) {
115     nz    = bi[i+1] - bi[i];
116     ajtmp = bj + bi[i];
117     for  (j=0; j<nz; j++) {
118       x = rtmp+4*ajtmp[j];
119       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
120     }
121     /* load in initial (unfactored row) */
122     nz       = ai[i+1] - ai[i];
123     ajtmpold = aj + ai[i];
124     v        = aa + 4*ai[i];
125     for (j=0; j<nz; j++) {
126       x    = rtmp+4*ajtmpold[j];
127       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
128       v    += 4;
129     }
130     row = *ajtmp++;
131     while (row < i) {
132       pc  = rtmp + 4*row;
133       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
134       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
135         pv = ba + 4*diag_offset[row];
136         pj = bj + diag_offset[row] + 1;
137         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
138         pc[0] = m1 = p1*x1 + p3*x2;
139         pc[1] = m2 = p2*x1 + p4*x2;
140         pc[2] = m3 = p1*x3 + p3*x4;
141         pc[3] = m4 = p2*x3 + p4*x4;
142         nz = bi[row+1] - diag_offset[row] - 1;
143         pv += 4;
144         for (j=0; j<nz; j++) {
145           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
146           x    = rtmp + 4*pj[j];
147           x[0] -= m1*x1 + m3*x2;
148           x[1] -= m2*x1 + m4*x2;
149           x[2] -= m1*x3 + m3*x4;
150           x[3] -= m2*x3 + m4*x4;
151           pv   += 4;
152         }
153         PetscLogFlops(16*nz+12);
154       }
155       row = *ajtmp++;
156     }
157     /* finished row so stick it into b->a */
158     pv = ba + 4*bi[i];
159     pj = bj + bi[i];
160     nz = bi[i+1] - bi[i];
161     for (j=0; j<nz; j++) {
162       x      = rtmp+4*pj[j];
163       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
164       pv   += 4;
165     }
166     /* invert diagonal block */
167     w = ba + 4*diag_offset[i];
168     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
169     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
170   }
171 
172   ierr = PetscFree(rtmp);CHKERRQ(ierr);
173   C->factor    = FACTOR_LU;
174   C->assembled = PETSC_TRUE;
175   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
176   PetscFunctionReturn(0);
177 }
178 
179 /* ----------------------------------------------------------- */
180 /*
181      Version for when blocks are 1 by 1.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
185 int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
186 {
187   Mat                C = *B;
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
189   IS                 isrow = b->row,isicol = b->icol;
190   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
191   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
192   int                *diag_offset = b->diag,diag,*pj;
193   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
194   MatScalar          *ba = b->a,*aa = a->a;
195 
196   PetscFunctionBegin;
197   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
198   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
199   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
200 
201   for (i=0; i<n; i++) {
202     nz    = bi[i+1] - bi[i];
203     ajtmp = bj + bi[i];
204     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
205 
206     /* load in initial (unfactored row) */
207     nz       = ai[r[i]+1] - ai[r[i]];
208     ajtmpold = aj + ai[r[i]];
209     v        = aa + ai[r[i]];
210     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
211 
212     row = *ajtmp++;
213     while (row < i) {
214       pc = rtmp + row;
215       if (*pc != 0.0) {
216         pv         = ba + diag_offset[row];
217         pj         = bj + diag_offset[row] + 1;
218         multiplier = *pc * *pv++;
219         *pc        = multiplier;
220         nz         = bi[row+1] - diag_offset[row] - 1;
221         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
222         PetscLogFlops(1+2*nz);
223       }
224       row = *ajtmp++;
225     }
226     /* finished row so stick it into b->a */
227     pv = ba + bi[i];
228     pj = bj + bi[i];
229     nz = bi[i+1] - bi[i];
230     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
231     diag = diag_offset[i] - bi[i];
232     /* check pivot entry for current row */
233     if (pv[diag] != 0.0) {
234       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
235     }
236     pv[diag] = 1.0/pv[diag];
237   }
238 
239   ierr = PetscFree(rtmp);CHKERRQ(ierr);
240   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
241   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
242   C->factor    = FACTOR_LU;
243   C->assembled = PETSC_TRUE;
244   PetscLogFlops(C->n);
245   PetscFunctionReturn(0);
246 }
247 
248 
249 /* ----------------------------------------------------------- */
250 #undef __FUNCT__
251 #define __FUNCT__ "MatLUFactor_SeqBAIJ"
252 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
253 {
254   int         ierr;
255   Mat         C;
256 
257   PetscFunctionBegin;
258   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
259   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
260   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
261   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
262   PetscFunctionReturn(0);
263 }
264 
265 
266