xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision ec3a8b7b2b6ff99871c5685f662fcbd7a6178392)
1 #ifndef lint
2 static char vcid[] = "$Id: baijfact.c,v 1.1 1996/02/13 22:33:11 bsmith Exp bsmith $";
3 #endif
4 /*
5     Factorization code for BAIJ format.
6 */
7 
8 #include "baij.h"
9 
10 /*
11     The symbolic factorization code is identical to that for AIJ format,
12   except for very small changes since this is now a SeqBAIJ datastructure.
13   NOT good code reuse.
14 */
15 int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
16 {
17   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
18   IS          isicol;
19   int         *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j;
20   int         *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
21   int         *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im;
22   int         bs = a->bs;
23 
24   if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square");
25   if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation");
26   if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation");
27 
28   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
29   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
30 
31   /* get new row pointers */
32   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
33   ainew[0] = -shift;
34   /* don't know how many column pointers are needed so estimate */
35   jmax = (int) (f*ai[n]+(!shift));
36   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
37   /* fill is a linked list of nonzeros in active row */
38   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
39   im = fill + n + 1;
40   /* idnew is location of diagonal in factor */
41   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
42   idnew[0] = -shift;
43 
44   for ( i=0; i<n; i++ ) {
45     /* first copy previous fill into linked list */
46     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
47     ajtmp   = aj + ai[r[i]] + shift;
48     fill[n] = n;
49     while (nz--) {
50       fm  = n;
51       idx = ic[*ajtmp++ + shift];
52       do {
53         m  = fm;
54         fm = fill[m];
55       } while (fm < idx);
56       fill[m]   = idx;
57       fill[idx] = fm;
58     }
59     row = fill[n];
60     while ( row < i ) {
61       ajtmp = ajnew + idnew[row] + (!shift);
62       nzbd  = 1 + idnew[row] - ainew[row];
63       nz    = im[row] - nzbd;
64       fm    = row;
65       while (nz-- > 0) {
66         idx = *ajtmp++ + shift;
67         nzbd++;
68         if (idx == i) im[row] = nzbd;
69         do {
70           m  = fm;
71           fm = fill[m];
72         } while (fm < idx);
73         if (fm != idx) {
74           fill[m]   = idx;
75           fill[idx] = fm;
76           fm        = idx;
77           nnz++;
78         }
79       }
80       row = fill[row];
81     }
82     /* copy new filled row into permanent storage */
83     ainew[i+1] = ainew[i] + nnz;
84     if (ainew[i+1] > jmax+1) {
85       /* allocate a longer ajnew */
86       int maxadd;
87       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
88       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
89       jmax += maxadd;
90       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
91       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
92       PetscFree(ajnew);
93       ajnew = ajtmp;
94       realloc++; /* count how many times we realloc */
95     }
96     ajtmp = ajnew + ainew[i] + shift;
97     fm    = fill[n];
98     nzi   = 0;
99     im[i] = nnz;
100     while (nnz--) {
101       if (fm < i) nzi++;
102       *ajtmp++ = fm - shift;
103       fm       = fill[fm];
104     }
105     idnew[i] = ainew[i] + nzi;
106   }
107 
108   PLogInfo((PetscObject)A,
109     "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
110                              realloc,f,((double)ainew[n])/((double)ai[i]));
111 
112   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
113   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
114 
115   PetscFree(fill);
116 
117   /* put together the new matrix */
118   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr);
119   PLogObjectParent(*B,isicol);
120   ierr = ISDestroy(isicol); CHKERRQ(ierr);
121   b = (Mat_SeqBAIJ *) (*B)->data;
122   PetscFree(b->imax);
123   b->singlemalloc = 0;
124   len             = (ainew[n] + shift)*sizeof(Scalar);
125   /* the next line frees the default space generated by the Create() */
126   PetscFree(b->a); PetscFree(b->ilen);
127   b->a          = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a);
128   b->j          = ajnew;
129   b->i          = ainew;
130   b->diag       = idnew;
131   b->ilen       = 0;
132   b->imax       = 0;
133   b->row        = isrow;
134   b->col        = iscol;
135   b->solve_work = (Scalar *) PetscMalloc( n*sizeof(Scalar));
136   CHKPTRQ(b->solve_work);
137   /* In b structure:  Free imax, ilen, old a, old j.
138      Allocate idnew, solve_work, new a, new j */
139   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
140   b->maxnz = b->nz = ainew[n] + shift;
141 
142   return 0;
143 }
144 
145 #include "pinclude/plapack.h"
146 
147 #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));}
148 
149 #define BlockCopy(v_in,v_out,idx_in,idx_out) \
150   {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));}
151 
152 #define BlockInvert(vv,idx) \
153   { int _i,info; Scalar *w = vv + bs2*idx; \
154 printf("vv idx %g %d \n",*vv,idx); \
155     LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \
156 printf("bs %d w %g\n",bs,*w);\
157     PetscMemzero(v_work,bs2*sizeof(Scalar));  \
158 printf("vwork %g\n",*v_work);\
159     for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \
160 printf("vwork %g\n",*v_work);\
161     LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\
162 printf("w %g vwork %g\n",*w,*v_work);\
163     PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \
164 printf("w %g vwork %g\n",*w,*v_work);\
165   }
166 
167 #define BlockMult(v1,v2,v3
168 /* ----------------------------------------------------------- */
169 int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B)
170 {
171   Mat         C = *B;
172   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data;
173   IS          iscol = b->col, isrow = b->row, isicol;
174   int         *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j;
175   int         *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
176   int         *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots;
177   Scalar      *rtmp,*v, *pc, multiplier,*v_work;
178   /* These declarations are for optimizations.  They reduce the number of
179      memory references that are made by locally storing information; the
180      word "register" used here with pointers can be viewed as "private" or
181      "known only to me"
182    */
183   register Scalar *pv, *rtmps;
184   register int    *pj;
185 
186   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
187   PLogObjectParent(*B,isicol);
188   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
189   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
190   rtmp  = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp);
191   rtmps = rtmp + shift; ics = ic + shift;
192 
193   /* generate work space needed by dense LU factorization */
194   v_work   = (Scalar *) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(v_work);
195   v_pivots = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(v_pivots);
196 
197   for ( i=0; i<n; i++ ) {
198     nz    = ai[i+1] - ai[i];
199     ajtmp = aj + ai[i] + shift;
200     for  ( j=0; j<nz; j++ ) BlockZero(rtmps,ajtmp[j]);
201 malloc_verify();
202     /* load in initial (unfactored row) */
203     nz       = a->i[r[i]+1] - a->i[r[i]];
204     ajtmpold = a->j + a->i[r[i]] + shift;
205     v        = a->a + bs2*a->i[r[i]] + shift;
206     for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ics[ajtmpold[j]]);
207 malloc_verify();
208 
209     row = *ajtmp++ + shift;
210     while (row < i) {
211       pc = rtmp + row;
212       if (*pc != 0.0) {
213         pv         = b->a + bs2*diag_offset[row] + shift;
214         pj         = b->j + diag_offset[row] + (!shift);
215         multiplier = *pc * (*pv);
216         *pc        = multiplier;
217         nz         = ai[row+1] - diag_offset[row] - 1;
218         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
219         pv        += bs2;
220         PLogFlops(2*nz);
221       }
222       row = *ajtmp++ + shift;
223     }
224     /* finished row so stick it into b->a */
225     pv = b->a + bs2*ai[i] + shift;
226     pj = b->j + ai[i] + shift;
227     nz = ai[i+1] - ai[i];
228     for ( j=0; j<nz; j++ ) BlockCopy(rtmps,pv,pj[j],j);
229 malloc_verify();
230     diag = diag_offset[i] - ai[i];
231     /* invert diagonal block */
232     BlockInvert(pv,diag);
233 malloc_verify();
234   }
235 
236   PetscFree(rtmp);
237   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
238   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
239   ierr = ISDestroy(isicol); CHKERRQ(ierr);
240   C->factor = FACTOR_LU;
241   C->assembled = PETSC_TRUE;
242   PLogFlops(b->n);
243   return 0;
244 }
245 /* ----------------------------------------------------------- */
246 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f)
247 {
248   Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data;
249   int         ierr;
250   Mat         C;
251 
252   ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr);
253   ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr);
254 
255   /* free all the data structures from mat */
256   PetscFree(mat->a);
257   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
258   if (mat->diag) PetscFree(mat->diag);
259   if (mat->ilen) PetscFree(mat->ilen);
260   if (mat->imax) PetscFree(mat->imax);
261   if (mat->solve_work) PetscFree(mat->solve_work);
262   PetscFree(mat);
263 
264   PetscMemcpy(A,C,sizeof(struct _Mat));
265   PetscHeaderDestroy(C);
266   return 0;
267 }
268 /* ----------------------------------------------------------- */
269 int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx)
270 {
271   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
272   IS          iscol = a->col, isrow = a->row;
273   int         *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
274   int         nz,shift = a->indexshift;
275   Scalar      *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
276 
277   if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix");
278 
279   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
280   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
281   tmp  = a->solve_work;
282 
283   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
284   ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1);
285 
286   /* forward solve the lower triangular */
287   tmp[0] = b[*r++];
288   tmps   = tmp + shift;
289   for ( i=1; i<n; i++ ) {
290     v   = aa + ai[i] + shift;
291     vi  = aj + ai[i] + shift;
292     nz  = a->diag[i] - ai[i];
293     sum = b[*r++];
294     while (nz--) sum -= *v++ * tmps[*vi++];
295     tmp[i] = sum;
296   }
297 
298   /* backward solve the upper triangular */
299   for ( i=n-1; i>=0; i-- ){
300     v   = aa + a->diag[i] + (!shift);
301     vi  = aj + a->diag[i] + (!shift);
302     nz  = ai[i+1] - a->diag[i] - 1;
303     sum = tmp[i];
304     while (nz--) sum -= *v++ * tmps[*vi++];
305     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
306   }
307 
308   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
309   ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
310   PLogFlops(2*a->nz - a->n);
311   return 0;
312 }
313 
314 /* ----------------------------------------------------------------*/
315 /*
316      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
317    except that the data structure of Mat_SeqAIJ is slightly different.
318    Not a good example of code reuse.
319    */
320 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels,
321                                  Mat *fact)
322 {
323   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
324   IS          isicol;
325   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
326   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
327   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
328   int         incrlev,nnz,i,shift = a->indexshift,bs = a->bs;
329 
330   if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square");
331   if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation");
332   if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation");
333 
334   /* special case that simply copies fill pattern */
335   if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) {
336     ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
337     (*fact)->factor = FACTOR_LU;
338     b               = (Mat_SeqBAIJ *) (*fact)->data;
339     if (!b->diag) {
340       ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
341     }
342     b->row        = isrow;
343     b->col        = iscol;
344     b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
345     return 0;
346   }
347 
348   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
349   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
350   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
351 
352   /* get new row pointers */
353   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
354   ainew[0] = -shift;
355   /* don't know how many column pointers are needed so estimate */
356   jmax = (int) (f*(ai[n]+!shift));
357   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
358   /* ajfill is level of fill for each fill entry */
359   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
360   /* fill is a linked list of nonzeros in active row */
361   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
362   /* im is level for each filled value */
363   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
364   /* dloc is location of diagonal in factor */
365   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
366   dloc[0]  = 0;
367   for ( prow=0; prow<n; prow++ ) {
368     /* first copy previous fill into linked list */
369     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
370     xi      = aj + ai[r[prow]] + shift;
371     fill[n] = n;
372     while (nz--) {
373       fm  = n;
374       idx = ic[*xi++ + shift];
375       do {
376         m  = fm;
377         fm = fill[m];
378       } while (fm < idx);
379       fill[m]   = idx;
380       fill[idx] = fm;
381       im[idx]   = 0;
382     }
383     nzi = 0;
384     row = fill[n];
385     while ( row < prow ) {
386       incrlev = im[row] + 1;
387       nz      = dloc[row];
388       xi      = ajnew  + ainew[row] + shift + nz;
389       flev    = ajfill + ainew[row] + shift + nz + 1;
390       nnz     = ainew[row+1] - ainew[row] - nz - 1;
391       if (*xi++ + shift != row) {
392         SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot");
393       }
394       fm      = row;
395       while (nnz-- > 0) {
396         idx = *xi++ + shift;
397         if (*flev + incrlev > levels) {
398           flev++;
399           continue;
400         }
401         do {
402           m  = fm;
403           fm = fill[m];
404         } while (fm < idx);
405         if (fm != idx) {
406           im[idx]   = *flev + incrlev;
407           fill[m]   = idx;
408           fill[idx] = fm;
409           fm        = idx;
410           nzf++;
411         }
412         else {
413           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
414         }
415         flev++;
416       }
417       row = fill[row];
418       nzi++;
419     }
420     /* copy new filled row into permanent storage */
421     ainew[prow+1] = ainew[prow] + nzf;
422     if (ainew[prow+1] > jmax-shift) {
423       /* allocate a longer ajnew */
424       int maxadd;
425       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
426       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
427       jmax += maxadd;
428       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
429       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
430       PetscFree(ajnew);
431       ajnew = xi;
432       /* allocate a longer ajfill */
433       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
434       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
435       PetscFree(ajfill);
436       ajfill = xi;
437       realloc++;
438     }
439     xi          = ajnew + ainew[prow] + shift;
440     flev        = ajfill + ainew[prow] + shift;
441     dloc[prow]  = nzi;
442     fm          = fill[n];
443     while (nzf--) {
444       *xi++   = fm - shift;
445       *flev++ = im[fm];
446       fm      = fill[fm];
447     }
448   }
449   PetscFree(ajfill);
450   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
451   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
452   ierr = ISDestroy(isicol); CHKERRQ(ierr);
453   PetscFree(fill); PetscFree(im);
454 
455   PLogInfo((PetscObject)A,
456     "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n",
457                              realloc,f,((double)ainew[n])/((double)ai[prow]));
458 
459   /* put together the new matrix */
460   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
461   b = (Mat_SeqBAIJ *) (*fact)->data;
462   PetscFree(b->imax);
463   b->singlemalloc = 0;
464   len = (ainew[n] + shift)*sizeof(Scalar);
465   /* the next line frees the default space generated by the Create() */
466   PetscFree(b->a); PetscFree(b->ilen);
467   b->a          = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
468   b->j          = ajnew;
469   b->i          = ainew;
470   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
471   b->diag       = dloc;
472   b->ilen       = 0;
473   b->imax       = 0;
474   b->row        = isrow;
475   b->col        = iscol;
476   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));
477   CHKPTRQ(b->solve_work);
478   /* In b structure:  Free imax, ilen, old a, old j.
479      Allocate dloc, solve_work, new a, new j */
480   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
481   b->maxnz          = b->nz = ainew[n] + shift;
482   (*fact)->factor   = FACTOR_LU;
483   return 0;
484 }
485 
486 
487 
488 
489