xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision d4e9f3b6443ec535fe5e409f785fa3cf019ceb6c)
1 /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
2 /*
3     Factorization code for BAIJ format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/ilu.h"
8 
9 /* ------------------------------------------------------------*/
10 /*
11       Version for when blocks are 2 by 2
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
15 int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
16 {
17   Mat                C = *B;
18   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
19   IS                 isrow = b->row,isicol = b->icol;
20   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
21   int                *ajtmpold,*ajtmp,nz,row;
22   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
23   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
24   MatScalar          p1,p2,p3,p4;
25   MatScalar          *ba = b->a,*aa = a->a;
26 
27   PetscFunctionBegin;
28   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
29   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
30   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
31 
32   for (i=0; i<n; i++) {
33     nz    = bi[i+1] - bi[i];
34     ajtmp = bj + bi[i];
35     for  (j=0; j<nz; j++) {
36       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
37     }
38     /* load in initial (unfactored row) */
39     idx      = r[i];
40     nz       = ai[idx+1] - ai[idx];
41     ajtmpold = aj + ai[idx];
42     v        = aa + 4*ai[idx];
43     for (j=0; j<nz; j++) {
44       x    = rtmp+4*ic[ajtmpold[j]];
45       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
46       v    += 4;
47     }
48     row = *ajtmp++;
49     while (row < i) {
50       pc = rtmp + 4*row;
51       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
52       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
53         pv = ba + 4*diag_offset[row];
54         pj = bj + diag_offset[row] + 1;
55         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
56         pc[0] = m1 = p1*x1 + p3*x2;
57         pc[1] = m2 = p2*x1 + p4*x2;
58         pc[2] = m3 = p1*x3 + p3*x4;
59         pc[3] = m4 = p2*x3 + p4*x4;
60         nz = bi[row+1] - diag_offset[row] - 1;
61         pv += 4;
62         for (j=0; j<nz; j++) {
63           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
64           x    = rtmp + 4*pj[j];
65           x[0] -= m1*x1 + m3*x2;
66           x[1] -= m2*x1 + m4*x2;
67           x[2] -= m1*x3 + m3*x4;
68           x[3] -= m2*x3 + m4*x4;
69           pv   += 4;
70         }
71         PetscLogFlops(16*nz+12);
72       }
73       row = *ajtmp++;
74     }
75     /* finished row so stick it into b->a */
76     pv = ba + 4*bi[i];
77     pj = bj + bi[i];
78     nz = bi[i+1] - bi[i];
79     for (j=0; j<nz; j++) {
80       x     = rtmp+4*pj[j];
81       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
82       pv   += 4;
83     }
84     /* invert diagonal block */
85     w = ba + 4*diag_offset[i];
86     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
87   }
88 
89   ierr = PetscFree(rtmp);CHKERRQ(ierr);
90   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
91   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
92   C->factor = FACTOR_LU;
93   C->assembled = PETSC_TRUE;
94   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
95   PetscFunctionReturn(0);
96 }
97 /*
98       Version for when blocks are 2 by 2 Using natural ordering
99 */
100 #undef __FUNCT__
101 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
102 int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
103 {
104   Mat                C = *B;
105   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
106   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107   int                *ajtmpold,*ajtmp,nz,row;
108   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
110   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
111   MatScalar          *ba = b->a,*aa = a->a;
112 
113   PetscFunctionBegin;
114   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
115 
116   for (i=0; i<n; i++) {
117     nz    = bi[i+1] - bi[i];
118     ajtmp = bj + bi[i];
119     for  (j=0; j<nz; j++) {
120       x = rtmp+4*ajtmp[j];
121       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
122     }
123     /* load in initial (unfactored row) */
124     nz       = ai[i+1] - ai[i];
125     ajtmpold = aj + ai[i];
126     v        = aa + 4*ai[i];
127     for (j=0; j<nz; j++) {
128       x    = rtmp+4*ajtmpold[j];
129       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
130       v    += 4;
131     }
132     row = *ajtmp++;
133     while (row < i) {
134       pc  = rtmp + 4*row;
135       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
136       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
137         pv = ba + 4*diag_offset[row];
138         pj = bj + diag_offset[row] + 1;
139         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
140         pc[0] = m1 = p1*x1 + p3*x2;
141         pc[1] = m2 = p2*x1 + p4*x2;
142         pc[2] = m3 = p1*x3 + p3*x4;
143         pc[3] = m4 = p2*x3 + p4*x4;
144         nz = bi[row+1] - diag_offset[row] - 1;
145         pv += 4;
146         for (j=0; j<nz; j++) {
147           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
148           x    = rtmp + 4*pj[j];
149           x[0] -= m1*x1 + m3*x2;
150           x[1] -= m2*x1 + m4*x2;
151           x[2] -= m1*x3 + m3*x4;
152           x[3] -= m2*x3 + m4*x4;
153           pv   += 4;
154         }
155         PetscLogFlops(16*nz+12);
156       }
157       row = *ajtmp++;
158     }
159     /* finished row so stick it into b->a */
160     pv = ba + 4*bi[i];
161     pj = bj + bi[i];
162     nz = bi[i+1] - bi[i];
163     for (j=0; j<nz; j++) {
164       x      = rtmp+4*pj[j];
165       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
166       pv   += 4;
167     }
168     /* invert diagonal block */
169     w = ba + 4*diag_offset[i];
170     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
171     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
172   }
173 
174   ierr = PetscFree(rtmp);CHKERRQ(ierr);
175   C->factor    = FACTOR_LU;
176   C->assembled = PETSC_TRUE;
177   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
178   PetscFunctionReturn(0);
179 }
180 
181 /* ----------------------------------------------------------- */
182 /*
183      Version for when blocks are 1 by 1.
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187 int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
188 {
189   Mat                C = *B;
190   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
191   IS                 isrow = b->row,isicol = b->icol;
192   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
193   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
194   int                *diag_offset = b->diag,diag,*pj;
195   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
196   MatScalar          *ba = b->a,*aa = a->a;
197 
198   PetscFunctionBegin;
199   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
200   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
201   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
202 
203   for (i=0; i<n; i++) {
204     nz    = bi[i+1] - bi[i];
205     ajtmp = bj + bi[i];
206     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
207 
208     /* load in initial (unfactored row) */
209     nz       = ai[r[i]+1] - ai[r[i]];
210     ajtmpold = aj + ai[r[i]];
211     v        = aa + ai[r[i]];
212     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
213 
214     row = *ajtmp++;
215     while (row < i) {
216       pc = rtmp + row;
217       if (*pc != 0.0) {
218         pv         = ba + diag_offset[row];
219         pj         = bj + diag_offset[row] + 1;
220         multiplier = *pc * *pv++;
221         *pc        = multiplier;
222         nz         = bi[row+1] - diag_offset[row] - 1;
223         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
224         PetscLogFlops(1+2*nz);
225       }
226       row = *ajtmp++;
227     }
228     /* finished row so stick it into b->a */
229     pv = ba + bi[i];
230     pj = bj + bi[i];
231     nz = bi[i+1] - bi[i];
232     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
233     diag = diag_offset[i] - bi[i];
234     /* check pivot entry for current row */
235     if (pv[diag] == 0.0) {
236       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
237     }
238     pv[diag] = 1.0/pv[diag];
239   }
240 
241   ierr = PetscFree(rtmp);CHKERRQ(ierr);
242   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
244   C->factor    = FACTOR_LU;
245   C->assembled = PETSC_TRUE;
246   PetscLogFlops(C->n);
247   PetscFunctionReturn(0);
248 }
249 
250 
251 /* ----------------------------------------------------------- */
252 #undef __FUNCT__
253 #define __FUNCT__ "MatLUFactor_SeqBAIJ"
254 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
255 {
256   int         ierr;
257   Mat         C;
258 
259   PetscFunctionBegin;
260   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
261   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
262   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
263   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
264   PetscFunctionReturn(0);
265 }
266 
267 
268