xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision eef2e97b0e9898b0e651e73e445c8c25945fe1ae)
1 #ifndef lint
2 static char vcid[] = "$Id: aijfact.c,v 1.15 1995/04/25 18:03:03 bsmith Exp bsmith $";
3 #endif
4 
5 
6 #include "aij.h"
7 #include "inline/spops.h"
8 /*
9     Factorization code for AIJ format.
10 */
11 
12 int MatLUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,Mat *fact)
13 {
14   Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew;
15   IS      isicol;
16   int     *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j;
17   int     *ainew,*ajnew, jmax,*fill, *ajtmp, nz;
18   int     *idnew, idx, row,m,fm, nnz, nzi,len;
19 
20   if (n != aij->n) SETERR(1,"Mat must be square");
21   if (!isrow) {SETERR(1,"Must have row permutation");}
22   if (!iscol) {SETERR(1,"Must have column permutation");}
23 
24   ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr);
25   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
26 
27   /* get new row pointers */
28   ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew);
29   ainew[0] = 1;
30   /* don't know how many column pointers are needed so estimate */
31   jmax = 2*ai[n];
32   ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew);
33   /* fill is a linked list of nonzeros in active row */
34   fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill);
35   /* idnew is location of diagonal in factor */
36   idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew);
37   idnew[0] = 1;
38 
39   for ( i=0; i<n; i++ ) {
40     /* first copy previous fill into linked list */
41     nnz = nz    = ai[r[i]+1] - ai[r[i]];
42     ajtmp = aj + ai[r[i]] - 1;
43     fill[n] = n;
44     while (nz--) {
45       fm = n;
46       idx = ic[*ajtmp++ - 1];
47       do {
48         m = fm;
49         fm = fill[m];
50       } while (fm < idx);
51       fill[m] = idx;
52       fill[idx] = fm;
53     }
54     row = fill[n];
55     while ( row < i ) {
56       ajtmp = ajnew + idnew[row] - 1;
57       nz = ainew[row+1] - idnew[row];
58       fm = row;
59       while (nz--) {
60         fm = n;
61         idx = *ajtmp++ - 1;
62         do {
63           m = fm;
64           fm = fill[m];
65         } while (fm < idx);
66         if (fm != idx) {
67           fill[m] = idx;
68           fill[idx] = fm;
69           fm = idx;
70           nnz++;
71         }
72       }
73       row = fill[row];
74     }
75     /* copy new filled row into permanent storage */
76     ainew[i+1] = ainew[i] + nnz;
77     if (ainew[i+1] > jmax+1) {
78       /* allocate a longer ajnew */
79       jmax += nnz*(n-i);
80       ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp);
81       MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int));
82       FREE(ajnew);
83       ajnew = ajtmp;
84     }
85     ajtmp = ajnew + ainew[i] - 1;
86     fm = fill[n];
87     nzi = 0;
88     while (nnz--) {
89       if (fm < i) nzi++;
90       *ajtmp++ = fm + 1;
91       fm = fill[fm];
92     }
93     idnew[i] = ainew[i] + nzi;
94   }
95 
96   ISDestroy(isicol); FREE(fill);
97 
98   /* put together the new matrix */
99   ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr);
100   aijnew = (Mat_AIJ *) (*fact)->data;
101   FREE(aijnew->imax);
102   aijnew->singlemalloc = 0;
103   len = (ainew[n] - 1)*sizeof(Scalar);
104   /* the next line frees the default space generated by the Create() */
105   FREE(aijnew->a); FREE(aijnew->ilen);
106   aijnew->a         = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a);
107   aijnew->j         = ajnew;
108   aijnew->i         = ainew;
109   aijnew->diag      = idnew;
110   aijnew->ilen      = 0;
111   aijnew->imax      = 0;
112   aijnew->row       = isrow;
113   aijnew->col       = iscol;
114   (*fact)->factor   = FACTOR_LU;
115   aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar));
116   CHKPTR(aijnew->solve_work);
117   /* Cannot do this here because child is destroyed before parent created
118      PLogObjectParent(*fact,isicol); */
119   return 0;
120 }
121 
122 int MatLUFactorNumeric_AIJ(Mat mat,Mat *infact)
123 {
124   Mat     fact = *infact;
125   Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew = (Mat_AIJ *)fact->data;
126   IS      iscol = aijnew->col, isrow = aijnew->row, isicol;
127   int     *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j;
128   int     *ajtmpold, *ajtmp, nz, row,*pj;
129   Scalar  *rtmp,*v, *pv, *pc, multiplier;
130 
131   ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr);
132   PLogObjectParent(*infact,isicol);
133   ierr = ISGetIndices(isrow,&r); CHKERR(ierr);
134   ierr = ISGetIndices(isicol,&ic); CHKERR(ierr);
135   rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp);
136 
137   for ( i=0; i<n; i++ ) {
138     nz = ai[i+1] - ai[i];
139     ajtmp = aj + ai[i] - 1;
140     for  ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0;
141 
142     /* load in initial (unfactored row) */
143     nz = aij->i[r[i]+1] - aij->i[r[i]];
144     ajtmpold = aij->j + aij->i[r[i]] - 1;
145     v  = aij->a + aij->i[r[i]] - 1;
146     for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] =  v[j];
147 
148     row = *ajtmp++ - 1;
149     while (row < i) {
150       pc = rtmp + row;
151       if (*pc != 0.0) {
152         nz = aijnew->diag[row] - ai[row];
153         pv = aijnew->a + aijnew->diag[row] - 1;
154         pj = aijnew->j + aijnew->diag[row];
155         multiplier = *pc * *pv++;
156         *pc = multiplier;
157         nz = ai[row+1] - ai[row] - 1 - nz;
158         PLogFlops(2*nz);
159         while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++;
160       }
161       row = *ajtmp++ - 1;
162     }
163     /* finished row so stick it into aijnew->a */
164     pv = aijnew->a + ai[i] - 1;
165     pj = aijnew->j + ai[i] - 1;
166     nz = ai[i+1] - ai[i];
167     rtmp[i] = 1.0/rtmp[i];
168     for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];}
169   }
170   FREE(rtmp);
171   ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr);
172   ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr);
173   ierr = ISDestroy(isicol); CHKERR(ierr);
174   fact->factor      = FACTOR_LU;
175   aijnew->assembled = 1;
176   aijnew->nz           = ai[aijnew->n];
177   PLogFlops(aijnew->n);
178   return 0;
179 }
180 int MatLUFactor_AIJ(Mat matin,IS row,IS col)
181 {
182   Mat_AIJ *mat = (Mat_AIJ *) matin->data;
183   int     ierr;
184   Mat     fact;
185   ierr = MatLUFactorSymbolic_AIJ(matin,row,col,&fact); CHKERR(ierr);
186   ierr = MatLUFactorNumeric_AIJ(matin,&fact); CHKERR(ierr);
187 
188   /* free all the data structures from mat */
189   FREE(mat->a);
190   if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);}
191   if (mat->diag) FREE(mat->diag);
192   if (mat->ilen) FREE(mat->ilen);
193   if (mat->imax) FREE(mat->imax);
194   if (mat->row && mat->col && mat->row != mat->col) {
195     ISDestroy(mat->row);
196   }
197   if (mat->col) ISDestroy(mat->col);
198   FREE(mat);
199 
200   MEMCPY(matin,fact,sizeof(struct _Mat));
201   FREE(fact);
202   return 0;
203 }
204 
205 int MatSolve_AIJ(Mat mat,Vec bb, Vec xx)
206 {
207   Mat_AIJ *aij = (Mat_AIJ *) mat->data;
208   IS      iscol = aij->col, isrow = aij->row;
209   int     *r,*c, ierr, i,  n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
210   int     nz;
211   Scalar  *x,*b,*tmp, *aa = aij->a, sum, *v;
212 
213   if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor");
214 
215   ierr = VecGetArray(bb,&b); CHKERR(ierr);
216   ierr = VecGetArray(xx,&x); CHKERR(ierr);
217   tmp = aij->solve_work;
218 
219   ierr = ISGetIndices(isrow,&r);CHKERR(ierr);
220   ierr = ISGetIndices(iscol,&c);CHKERR(ierr); c = c + (n-1);
221 
222   /* forward solve the lower triangular */
223   tmp[0] = b[*r++];
224   for ( i=1; i<n; i++ ) {
225     v   = aa + ai[i] - 1;
226     vi  = aj + ai[i] - 1;
227     nz  = aij->diag[i] - ai[i];
228     sum = b[*r++];
229     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
230     tmp[i] = sum;
231   }
232 
233   /* backward solve the upper triangular */
234   for ( i=n-1; i>=0; i-- ){
235     v   = aa + aij->diag[i];
236     vi  = aj + aij->diag[i];
237     nz  = ai[i+1] - aij->diag[i] - 1;
238     sum = tmp[i];
239     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
240     x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1];
241   }
242 
243   PLogFlops(2*aij->nz - aij->n);
244   return 0;
245 }
246 int MatSolveAdd_AIJ(Mat mat,Vec bb, Vec yy, Vec xx)
247 {
248   Mat_AIJ *aij = (Mat_AIJ *) mat->data;
249   IS      iscol = aij->col, isrow = aij->row;
250   int     *r,*c, ierr, i,  n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
251   int     nz;
252   Scalar  *x,*b,*tmp, *aa = aij->a, sum, *v;
253 
254   if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor");
255   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);}
256 
257   ierr = VecGetArray(bb,&b); CHKERR(ierr);
258   ierr = VecGetArray(xx,&x); CHKERR(ierr);
259   tmp = aij->solve_work;
260 
261   ierr = ISGetIndices(isrow,&r); CHKERR(ierr);
262   ierr = ISGetIndices(iscol,&c); CHKERR(ierr); c = c + (n-1);
263 
264   /* forward solve the lower triangular */
265   tmp[0] = b[*r++];
266   for ( i=1; i<n; i++ ) {
267     v   = aa + ai[i] - 1;
268     vi  = aj + ai[i] - 1;
269     nz  = aij->diag[i] - ai[i];
270     sum = b[*r++];
271     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
272     tmp[i] = sum;
273   }
274 
275   /* backward solve the upper triangular */
276   for ( i=n-1; i>=0; i-- ){
277     v   = aa + aij->diag[i];
278     vi  = aj + aij->diag[i];
279     nz  = ai[i+1] - aij->diag[i] - 1;
280     sum = tmp[i];
281     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
282     tmp[i] = sum*aa[aij->diag[i]-1];
283     x[*c--] += tmp[i];
284   }
285 
286   PLogFlops(2*aij->nz);
287   return 0;
288 }
289 /* -------------------------------------------------------------------*/
290 int MatSolveTrans_AIJ(Mat mat,Vec bb, Vec xx)
291 {
292   Mat_AIJ *aij = (Mat_AIJ *) mat->data;
293   IS      iscol = aij->col, isrow = aij->row, invisrow,inviscol;
294   int     *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
295   int     nz;
296   Scalar  *x,*b,*tmp, *aa = aij->a, *v;
297 
298   if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor");
299   ierr = VecGetArray(bb,&b); CHKERR(ierr);
300   ierr = VecGetArray(xx,&x); CHKERR(ierr);
301   tmp = aij->solve_work;
302 
303   /* invert the permutations */
304   ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr);
305   ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr);
306 
307 
308   ierr = ISGetIndices(invisrow,&r); CHKERR(ierr);
309   ierr = ISGetIndices(inviscol,&c); CHKERR(ierr);
310 
311   /* copy the b into temp work space according to permutation */
312   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
313 
314   /* forward solve the U^T */
315   for ( i=0; i<n; i++ ) {
316     v   = aa + aij->diag[i] - 1;
317     vi  = aj + aij->diag[i];
318     nz  = ai[i+1] - aij->diag[i] - 1;
319     tmp[i] *= *v++;
320     while (nz--) {
321       tmp[*vi++ - 1] -= (*v++)*tmp[i];
322     }
323   }
324 
325   /* backward solve the L^T */
326   for ( i=n-1; i>=0; i-- ){
327     v   = aa + aij->diag[i] - 2;
328     vi  = aj + aij->diag[i] - 2;
329     nz  = aij->diag[i] - ai[i];
330     while (nz--) {
331       tmp[*vi-- - 1] -= (*v--)*tmp[i];
332     }
333   }
334 
335   /* copy tmp into x according to permutation */
336   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
337 
338   ISDestroy(invisrow); ISDestroy(inviscol);
339 
340   PLogFlops(2*aij->nz-aij->n);
341   return 0;
342 }
343 
344 int MatSolveTransAdd_AIJ(Mat mat,Vec bb, Vec zz,Vec xx)
345 {
346   Mat_AIJ *aij = (Mat_AIJ *) mat->data;
347   IS      iscol = aij->col, isrow = aij->row, invisrow,inviscol;
348   int     *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
349   int     nz;
350   Scalar  *x,*b,*tmp, *aa = aij->a, *v;
351 
352   if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor");
353   if (zz != xx) VecCopy(zz,xx);
354 
355   ierr = VecGetArray(bb,&b); CHKERR(ierr);
356   ierr = VecGetArray(xx,&x); CHKERR(ierr);
357   tmp = aij->solve_work;
358 
359   /* invert the permutations */
360   ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr);
361   ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr);
362 
363 
364   ierr = ISGetIndices(invisrow,&r); CHKERR(ierr);
365   ierr = ISGetIndices(inviscol,&c); CHKERR(ierr);
366 
367   /* copy the b into temp work space according to permutation */
368   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
369 
370   /* forward solve the U^T */
371   for ( i=0; i<n; i++ ) {
372     v   = aa + aij->diag[i] - 1;
373     vi  = aj + aij->diag[i];
374     nz  = ai[i+1] - aij->diag[i] - 1;
375     tmp[i] *= *v++;
376     while (nz--) {
377       tmp[*vi++ - 1] -= (*v++)*tmp[i];
378     }
379   }
380 
381   /* backward solve the L^T */
382   for ( i=n-1; i>=0; i-- ){
383     v   = aa + aij->diag[i] - 2;
384     vi  = aj + aij->diag[i] - 2;
385     nz  = aij->diag[i] - ai[i];
386     while (nz--) {
387       tmp[*vi-- - 1] -= (*v--)*tmp[i];
388     }
389   }
390 
391   /* copy tmp into x according to permutation */
392   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
393 
394   ISDestroy(invisrow); ISDestroy(inviscol);
395 
396   PLogFlops(2*aij->nz);
397   return 0;
398 }
399 /* ----------------------------------------------------------------*/
400 int MatILU_AIJ(Mat mat,IS isrow,IS iscol,int levels,Mat *fact)
401 {
402   Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew;
403   IS      isicol;
404   int     *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j;
405   int     *ainew,*ajnew, jmax,*fill, *ajtmp, nz, *lfill,*ajfill,*ajtmpf;
406   int     *idnew, idx, row,m,fm, nnz, nzi,len;
407 
408   if (n != aij->n) SETERR(1,"Mat must be square");
409   if (!isrow) {SETERR(1,"Must have row permutation");}
410   if (!iscol) {SETERR(1,"Must have column permutation");}
411 
412   ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr);
413   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
414 
415   /* get new row pointers */
416   ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew);
417   ainew[0] = 1;
418   /* don't know how many column pointers are needed so estimate */
419   jmax = 2*ai[n];
420   ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew);
421   /* ajfill is level of fill for each fill entry */
422   ajfill = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajfill);
423   /* fill is a linked list of nonzeros in active row */
424   fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill);
425   /* lfill is level for each filled value */
426   lfill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(lfill);
427   /* idnew is location of diagonal in factor */
428   idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew);
429   idnew[0] = 1;
430 
431   for ( i=0; i<n; i++ ) {
432     /* first copy previous fill into linked list */
433     nnz = nz    = ai[r[i]+1] - ai[r[i]];
434     ajtmp = aj + ai[r[i]] - 1;
435     fill[n] = n;
436     while (nz--) {
437       fm = n;
438       idx = ic[*ajtmp++ - 1];
439       do {
440         m = fm;
441         fm = fill[m];
442       } while (fm < idx);
443       fill[m] = idx;
444       fill[idx] = fm;
445       lfill[idx] = -1;
446     }
447     row = fill[n];
448     while ( row < i ) {
449       ajtmp  = ajnew + idnew[row] - 1;
450       ajtmpf = ajfill + idnew[row] - 1;
451       nz = ainew[row+1] - idnew[row];
452       fm = row;
453       while (nz--) {
454         fm = n;
455         idx = *ajtmp++ - 1;
456         do {
457           m = fm;
458           fm = fill[m];
459         } while (fm < idx);
460         if (fm != idx) {
461           lfill[idx] = *ajtmpf + 1;
462           if (lfill[idx] < levels) {
463             fill[m] = idx;
464             fill[idx] = fm;
465             fm = idx;
466             nnz++;
467           }
468         }
469         ajtmpf++;
470       }
471       row = fill[row];
472     }
473     /* copy new filled row into permanent storage */
474     ainew[i+1] = ainew[i] + nnz;
475     if (ainew[i+1] > jmax+1) {
476       /* allocate a longer ajnew */
477       jmax += nnz*(n-i);
478       ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp);
479       MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int));
480       FREE(ajnew);
481       ajnew = ajtmp;
482       /* allocate a longer ajfill */
483       ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp);
484       MEMCPY(ajtmp,ajfill,(ainew[i]-1)*sizeof(int));
485       FREE(ajfill);
486       ajfill = ajtmp;
487     }
488     ajtmp  = ajnew + ainew[i] - 1;
489     ajtmpf = ajfill + ainew[i] - 1;
490     fm = fill[n];
491     nzi = 0;
492     while (nnz--) {
493       if (fm < i) nzi++;
494       *ajtmp++  = fm + 1;
495       *ajtmpf++ = lfill[fm];
496       fm = fill[fm];
497     }
498     idnew[i] = ainew[i] + nzi;
499   }
500   FREE(ajfill);
501   ISDestroy(isicol); FREE(fill); FREE(lfill);
502 
503   /* put together the new matrix */
504   ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr);
505   aijnew = (Mat_AIJ *) (*fact)->data;
506   FREE(aijnew->imax);
507   aijnew->singlemalloc = 0;
508   len = (ainew[n] - 1)*sizeof(Scalar);
509   /* the next line frees the default space generated by the Create() */
510   FREE(aijnew->a); FREE(aijnew->ilen);
511   aijnew->a         = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a);
512   aijnew->j         = ajnew;
513   aijnew->i         = ainew;
514   aijnew->diag      = idnew;
515   aijnew->ilen      = 0;
516   aijnew->imax      = 0;
517   aijnew->row       = isrow;
518   aijnew->col       = iscol;
519   aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar));
520   CHKPTR(aijnew->solve_work);
521   (*fact)->factor   = FACTOR_LU;
522   return 0;
523 }
524