xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: aijfact.c,v 1.89 1997/09/18 16:47:24 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/dot.h"
8 
9 #undef __FUNC__
10 #define __FUNC__ "MatOrder_Flow_SeqAIJ"
11 int MatOrder_Flow_SeqAIJ(Mat mat,MatReordering type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
16 }
17 
18 /*
19     Factorization code for AIJ format.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
23 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
24 {
25   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
26   IS         isicol;
27   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
28   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
29   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(isrow,IS_COOKIE);
33   PetscValidHeaderSpecific(iscol,IS_COOKIE);
34 
35   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
36   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
37 
38   /* get new row pointers */
39   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
40   ainew[0] = -shift;
41   /* don't know how many column pointers are needed so estimate */
42   jmax = (int) (f*ai[n]+(!shift));
43   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
44   /* fill is a linked list of nonzeros in active row */
45   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
46   im = fill + n + 1;
47   /* idnew is location of diagonal in factor */
48   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
49   idnew[0] = -shift;
50 
51   for ( i=0; i<n; i++ ) {
52     /* first copy previous fill into linked list */
53     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
54     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
55     ajtmp   = aj + ai[r[i]] + shift;
56     fill[n] = n;
57     while (nz--) {
58       fm  = n;
59       idx = ic[*ajtmp++ + shift];
60       do {
61         m  = fm;
62         fm = fill[m];
63       } while (fm < idx);
64       fill[m]   = idx;
65       fill[idx] = fm;
66     }
67     row = fill[n];
68     while ( row < i ) {
69       ajtmp = ajnew + idnew[row] + (!shift);
70       nzbd  = 1 + idnew[row] - ainew[row];
71       nz    = im[row] - nzbd;
72       fm    = row;
73       while (nz-- > 0) {
74         idx = *ajtmp++ + shift;
75         nzbd++;
76         if (idx == i) im[row] = nzbd;
77         do {
78           m  = fm;
79           fm = fill[m];
80         } while (fm < idx);
81         if (fm != idx) {
82           fill[m]   = idx;
83           fill[idx] = fm;
84           fm        = idx;
85           nnz++;
86         }
87       }
88       row = fill[row];
89     }
90     /* copy new filled row into permanent storage */
91     ainew[i+1] = ainew[i] + nnz;
92     if (ainew[i+1] > jmax) {
93       /* allocate a longer ajnew */
94       int maxadd;
95       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
96       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
97       jmax += maxadd;
98       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
99       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
100       PetscFree(ajnew);
101       ajnew = ajtmp;
102       realloc++; /* count how many times we realloc */
103     }
104     ajtmp = ajnew + ainew[i] + shift;
105     fm    = fill[n];
106     nzi   = 0;
107     im[i] = nnz;
108     while (nnz--) {
109       if (fm < i) nzi++;
110       *ajtmp++ = fm - shift;
111       fm       = fill[fm];
112     }
113     idnew[i] = ainew[i] + nzi;
114   }
115   if (ai[n] != 0) {
116     double af = ((double)ainew[n])/((double)ai[n]);
117     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
118              realloc,f,af);
119     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
120     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
121     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
122   } else {
123     PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
124   }
125 
126   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
127   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
128 
129   PetscFree(fill);
130 
131   /* put together the new matrix */
132   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr);
133   PLogObjectParent(*B,isicol);
134   ierr = ISDestroy(isicol); CHKERRQ(ierr);
135   b = (Mat_SeqAIJ *) (*B)->data;
136   PetscFree(b->imax);
137   b->singlemalloc = 0;
138   /* the next line frees the default space generated by the Create() */
139   PetscFree(b->a); PetscFree(b->ilen);
140   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
141   b->j          = ajnew;
142   b->i          = ainew;
143   b->diag       = idnew;
144   b->ilen       = 0;
145   b->imax       = 0;
146   b->row        = isrow;
147   b->col        = iscol;
148   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
149   /* In b structure:  Free imax, ilen, old a, old j.
150      Allocate idnew, solve_work, new a, new j */
151   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
152   b->maxnz = b->nz = ainew[n] + shift;
153 
154   (*B)->info.factor_mallocs    = realloc;
155   (*B)->info.fill_ratio_given  = f;
156   if (ai[i] != 0) {
157     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]);
158   } else {
159     (*B)->info.fill_ratio_needed = 0.0;
160   }
161 
162   PetscFunctionReturn(0);
163 }
164 /* ----------------------------------------------------------- */
165 int Mat_AIJ_CheckInode(Mat);
166 
167 #undef __FUNC__
168 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
169 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
170 {
171   Mat        C = *B;
172   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
173   IS         iscol = b->col, isrow = b->row, isicol;
174   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
175   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
176   int        *diag_offset = b->diag,diag,k;
177   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
178   register   int    *pj;
179   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
180   double     ssum;
181   register   Scalar *pv, *rtmps,*u_values;
182 
183   PetscFunctionBegin;
184   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
185   PLogObjectParent(*B,isicol);
186   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
187   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
188   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp);
189   PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
190   rtmps = rtmp + shift; ics = ic + shift;
191 
192   /* precalcuate row sums */
193   if (preserve_row_sums) {
194     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums);
195     for ( i=0; i<n; i++ ) {
196       nz  = a->i[r[i]+1] - a->i[r[i]];
197       v   = a->a + a->i[r[i]] + shift;
198       sum = 0.0;
199       for ( j=0; j<nz; j++ ) sum += v[j];
200       rowsums[i] = sum;
201     }
202   }
203 
204   for ( i=0; i<n; i++ ) {
205     nz    = ai[i+1] - ai[i];
206     ajtmp = aj + ai[i] + shift;
207     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
208 
209     /* load in initial (unfactored row) */
210     nz       = a->i[r[i]+1] - a->i[r[i]];
211     ajtmpold = a->j + a->i[r[i]] + shift;
212     v        = a->a + a->i[r[i]] + shift;
213     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
214 
215     row = *ajtmp++ + shift;
216       while  (row < i ) {
217       pc = rtmp + row;
218       if (*pc != 0.0) {
219         pv         = b->a + diag_offset[row] + shift;
220         pj         = b->j + diag_offset[row] + (!shift);
221         multiplier = *pc / *pv++;
222         *pc        = multiplier;
223         nz         = ai[row+1] - diag_offset[row] - 1;
224         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
225         PLogFlops(2*nz);
226       }
227       row = *ajtmp++ + shift;
228     }
229     /* finished row so stick it into b->a */
230     pv = b->a + ai[i] + shift;
231     pj = b->j + ai[i] + shift;
232     nz = ai[i+1] - ai[i];
233     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
234     diag = diag_offset[i] - ai[i];
235     /*
236           Possibly adjust diagonal entry on current row to force
237         LU matrix to have same row sum as initial matrix.
238     */
239     if (pv[diag] == 0.0) {
240       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot");
241     }
242     if (preserve_row_sums) {
243       pj  = b->j + ai[i] + shift;
244       sum = rowsums[i];
245       for ( j=0; j<diag; j++ ) {
246         u_values  = b->a + diag_offset[pj[j]] + shift;
247         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
248         inner_sum = 0.0;
249         for ( k=0; k<nz; k++ ) {
250           inner_sum += u_values[k];
251         }
252         sum -= pv[j]*inner_sum;
253 
254       }
255       nz       = ai[i+1] - diag_offset[i] - 1;
256       u_values = b->a + diag_offset[i] + 1 + shift;
257       for ( k=0; k<nz; k++ ) {
258         sum -= u_values[k];
259       }
260       ssum = PetscAbsScalar(sum/pv[diag]);
261       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
262     }
263     /* check pivot entry for current row */
264   }
265 
266   /* invert diagonal entries for simplier triangular solves */
267   for ( i=0; i<n; i++ ) {
268     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
269   }
270 
271   if (preserve_row_sums) PetscFree(rowsums);
272   PetscFree(rtmp);
273   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
274   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
275   ierr = ISDestroy(isicol); CHKERRQ(ierr);
276   C->factor = FACTOR_LU;
277   ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr);
278   C->assembled = PETSC_TRUE;
279   PLogFlops(b->n);
280   PetscFunctionReturn(0);
281 }
282 /* ----------------------------------------------------------- */
283 #undef __FUNC__
284 #define __FUNC__ "MatLUFactor_SeqAIJ"
285 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
286 {
287   Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data;
288   int        ierr;
289   Mat        C;
290 
291   PetscFunctionBegin;
292   ierr = MatLUFactorSymbolic(A,row,col,f,&C); CHKERRQ(ierr);
293   ierr = MatLUFactorNumeric(A,&C); CHKERRQ(ierr);
294 
295   /* free all the data structures from mat */
296   PetscFree(mat->a);
297   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
298   if (mat->diag) PetscFree(mat->diag);
299   if (mat->ilen) PetscFree(mat->ilen);
300   if (mat->imax) PetscFree(mat->imax);
301   if (mat->solve_work) PetscFree(mat->solve_work);
302   if (mat->inode.size) PetscFree(mat->inode.size);
303   PetscFree(mat);
304 
305   PetscMemcpy(A,C,sizeof(struct _p_Mat));
306   PetscHeaderDestroy(C);
307   PetscFunctionReturn(0);
308 }
309 /* ----------------------------------------------------------- */
310 #undef __FUNC__
311 #define __FUNC__ "MatSolve_SeqAIJ"
312 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
313 {
314   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
315   IS         iscol = a->col, isrow = a->row;
316   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
317   int        nz,shift = a->indexshift,*rout,*cout;
318   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
319 
320   PetscFunctionBegin;
321   if (!n) PetscFunctionReturn(0);
322 
323   VecGetArray_Fast(bb,b);
324   VecGetArray_Fast(xx,x);
325   tmp  = a->solve_work;
326 
327   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
328   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
329 
330   /* forward solve the lower triangular */
331   tmp[0] = b[*r++];
332   tmps   = tmp + shift;
333   for ( i=1; i<n; i++ ) {
334     v   = aa + ai[i] + shift;
335     vi  = aj + ai[i] + shift;
336     nz  = a->diag[i] - ai[i];
337     sum = b[*r++];
338     while (nz--) sum -= *v++ * tmps[*vi++];
339     tmp[i] = sum;
340   }
341 
342   /* backward solve the upper triangular */
343   for ( i=n-1; i>=0; i-- ){
344     v   = aa + a->diag[i] + (!shift);
345     vi  = aj + a->diag[i] + (!shift);
346     nz  = ai[i+1] - a->diag[i] - 1;
347     sum = tmp[i];
348     while (nz--) sum -= *v++ * tmps[*vi++];
349     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
350   }
351 
352   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
353   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
354   PLogFlops(2*a->nz - a->n);
355   PetscFunctionReturn(0);
356 }
357 
358 /* ----------------------------------------------------------- */
359 #undef __FUNC__
360 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
361 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
362 {
363   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
364   int        i,  n = a->m, *vi, *ai = a->i, *aj = a->j,nz, *adiag = a->diag;
365   int        ai_i, adiag_i,ierr;
366   Scalar     *x,*b, *aa = a->a, sum, *v;
367 
368   PetscFunctionBegin;
369   if (!n) PetscFunctionReturn(0);
370   if (a->indexshift) {
371      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
372      PetscFunctionReturn(0);
373   }
374 
375   VecGetArray_Fast(bb,b);
376   VecGetArray_Fast(xx,x);
377 
378 #if defined(USE_FORTRAN_KERNELS)
379   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
380 #else
381   /* forward solve the lower triangular */
382   x[0] = b[0];
383   for ( i=1; i<n; i++ ) {
384     ai_i = ai[i];
385     v    = aa + ai_i;
386     vi   = aj + ai_i;
387     nz   = adiag[i] - ai_i;
388     sum  = b[i];
389     while (nz--) sum -= *v++ * x[*vi++];
390     x[i] = sum;
391   }
392 
393   /* backward solve the upper triangular */
394   for ( i=n-1; i>=0; i-- ){
395     adiag_i = adiag[i];
396     v       = aa + adiag_i + 1;
397     vi      = aj + adiag_i + 1;
398     nz      = ai[i+1] - adiag_i - 1;
399     sum     = x[i];
400     while (nz--) sum -= *v++ * x[*vi++];
401     x[i]    = sum*aa[adiag_i];
402   }
403 #endif
404   PLogFlops(2*a->nz - a->n);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNC__
409 #define __FUNC__ "MatSolveAdd_SeqAIJ"
410 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
411 {
412   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
413   IS         iscol = a->col, isrow = a->row;
414   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
415   int        nz, shift = a->indexshift,*rout,*cout;
416   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
417 
418   PetscFunctionBegin;
419   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);}
420 
421   VecGetArray_Fast(bb,b);
422   VecGetArray_Fast(xx,x);
423   tmp  = a->solve_work;
424 
425   ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout;
426   ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1);
427 
428   /* forward solve the lower triangular */
429   tmp[0] = b[*r++];
430   for ( i=1; i<n; i++ ) {
431     v   = aa + ai[i] + shift;
432     vi  = aj + ai[i] + shift;
433     nz  = a->diag[i] - ai[i];
434     sum = b[*r++];
435     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
436     tmp[i] = sum;
437   }
438 
439   /* backward solve the upper triangular */
440   for ( i=n-1; i>=0; i-- ){
441     v   = aa + a->diag[i] + (!shift);
442     vi  = aj + a->diag[i] + (!shift);
443     nz  = ai[i+1] - a->diag[i] - 1;
444     sum = tmp[i];
445     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
446     tmp[i] = sum*aa[a->diag[i]+shift];
447     x[*c--] += tmp[i];
448   }
449 
450   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
451   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
452   PLogFlops(2*a->nz);
453 
454   PetscFunctionReturn(0);
455 }
456 /* -------------------------------------------------------------------*/
457 #undef __FUNC__
458 #define __FUNC__ "MatSolveTrans_SeqAIJ"
459 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx)
460 {
461   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
462   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
463   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
464   int        nz,shift = a->indexshift,*rout,*cout;
465   Scalar     *x,*b,*tmp, *aa = a->a, *v;
466 
467   PetscFunctionBegin;
468   VecGetArray_Fast(bb,b);
469   VecGetArray_Fast(xx,x);
470   tmp  = a->solve_work;
471 
472   /* invert the permutations */
473   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
474   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
475 
476   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
477   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
478 
479   /* copy the b into temp work space according to permutation */
480   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
481 
482   /* forward solve the U^T */
483   for ( i=0; i<n; i++ ) {
484     v   = aa + a->diag[i] + shift;
485     vi  = aj + a->diag[i] + (!shift);
486     nz  = ai[i+1] - a->diag[i] - 1;
487     tmp[i] *= *v++;
488     while (nz--) {
489       tmp[*vi++ + shift] -= (*v++)*tmp[i];
490     }
491   }
492 
493   /* backward solve the L^T */
494   for ( i=n-1; i>=0; i-- ){
495     v   = aa + a->diag[i] - 1 + shift;
496     vi  = aj + a->diag[i] - 1 + shift;
497     nz  = a->diag[i] - ai[i];
498     while (nz--) {
499       tmp[*vi-- + shift] -= (*v--)*tmp[i];
500     }
501   }
502 
503   /* copy tmp into x according to permutation */
504   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
505 
506   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
507   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
508   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
509   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
510 
511   PLogFlops(2*a->nz-a->n);
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNC__
516 #define __FUNC__ "MatSolveTransAdd_SeqAIJ"
517 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
518 {
519   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
520   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
521   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
522   int        nz,shift = a->indexshift, *rout, *cout;
523   Scalar     *x,*b,*tmp, *aa = a->a, *v;
524 
525   PetscFunctionBegin;
526   if (zz != xx) VecCopy(zz,xx);
527 
528   VecGetArray_Fast(bb,b);
529   VecGetArray_Fast(xx,x);
530   tmp = a->solve_work;
531 
532   /* invert the permutations */
533   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
534   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
535   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
536   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
537 
538   /* copy the b into temp work space according to permutation */
539   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
540 
541   /* forward solve the U^T */
542   for ( i=0; i<n; i++ ) {
543     v   = aa + a->diag[i] + shift;
544     vi  = aj + a->diag[i] + (!shift);
545     nz  = ai[i+1] - a->diag[i] - 1;
546     tmp[i] *= *v++;
547     while (nz--) {
548       tmp[*vi++ + shift] -= (*v++)*tmp[i];
549     }
550   }
551 
552   /* backward solve the L^T */
553   for ( i=n-1; i>=0; i-- ){
554     v   = aa + a->diag[i] - 1 + shift;
555     vi  = aj + a->diag[i] - 1 + shift;
556     nz  = a->diag[i] - ai[i];
557     while (nz--) {
558       tmp[*vi-- + shift] -= (*v--)*tmp[i];
559     }
560   }
561 
562   /* copy tmp into x according to permutation */
563   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
564 
565   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
566   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
567   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
568   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
569 
570   PLogFlops(2*a->nz);
571   PetscFunctionReturn(0);
572 }
573 /* ----------------------------------------------------------------*/
574 
575 #undef __FUNC__
576 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
577 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
578 {
579   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
580   IS         isicol;
581   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
582   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
583   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
584   int        incrlev,nnz,i,shift = a->indexshift;
585   PetscTruth col_identity, row_identity;
586 
587   PetscFunctionBegin;
588   /* special case that simply copies fill pattern */
589   ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity);
590   if (levels == 0 && row_identity && col_identity) {
591     ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
592     (*fact)->factor = FACTOR_LU;
593     b               = (Mat_SeqAIJ *) (*fact)->data;
594     if (!b->diag) {
595       ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr);
596     }
597     b->row             = isrow;
598     b->col             = iscol;
599     b->solve_work      = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
600     (*fact)->ops.solve = MatSolve_SeqAIJ_NaturalOrdering;
601     PetscFunctionReturn(0);
602   }
603 
604   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
605   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
606   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
607 
608   /* get new row pointers */
609   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
610   ainew[0] = -shift;
611   /* don't know how many column pointers are needed so estimate */
612   jmax = (int) (f*(ai[n]+!shift));
613   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
614   /* ajfill is level of fill for each fill entry */
615   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
616   /* fill is a linked list of nonzeros in active row */
617   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
618   /* im is level for each filled value */
619   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
620   /* dloc is location of diagonal in factor */
621   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
622   dloc[0]  = 0;
623   for ( prow=0; prow<n; prow++ ) {
624     /* first copy previous fill into linked list */
625     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
626     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
627     xi      = aj + ai[r[prow]] + shift;
628     fill[n] = n;
629     while (nz--) {
630       fm  = n;
631       idx = ic[*xi++ + shift];
632       do {
633         m  = fm;
634         fm = fill[m];
635       } while (fm < idx);
636       fill[m]   = idx;
637       fill[idx] = fm;
638       im[idx]   = 0;
639     }
640     nzi = 0;
641     row = fill[n];
642     while ( row < prow ) {
643       incrlev = im[row] + 1;
644       nz      = dloc[row];
645       xi      = ajnew  + ainew[row] + shift + nz;
646       flev    = ajfill + ainew[row] + shift + nz + 1;
647       nnz     = ainew[row+1] - ainew[row] - nz - 1;
648       if (*xi++ + shift != row) {
649         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal");
650       }
651       fm      = row;
652       while (nnz-- > 0) {
653         idx = *xi++ + shift;
654         if (*flev + incrlev > levels) {
655           flev++;
656           continue;
657         }
658         do {
659           m  = fm;
660           fm = fill[m];
661         } while (fm < idx);
662         if (fm != idx) {
663           im[idx]   = *flev + incrlev;
664           fill[m]   = idx;
665           fill[idx] = fm;
666           fm        = idx;
667           nzf++;
668         }
669         else {
670           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
671         }
672         flev++;
673       }
674       row = fill[row];
675       nzi++;
676     }
677     /* copy new filled row into permanent storage */
678     ainew[prow+1] = ainew[prow] + nzf;
679     if (ainew[prow+1] > jmax-shift) {
680       /* allocate a longer ajnew */
681       int maxadd;
682       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
683       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
684       jmax += maxadd;
685       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
686       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
687       PetscFree(ajnew);
688       ajnew = xi;
689       /* allocate a longer ajfill */
690       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
691       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
692       PetscFree(ajfill);
693       ajfill = xi;
694       realloc++;
695     }
696     xi          = ajnew + ainew[prow] + shift;
697     flev        = ajfill + ainew[prow] + shift;
698     dloc[prow]  = nzi;
699     fm          = fill[n];
700     while (nzf--) {
701       *xi++   = fm - shift;
702       *flev++ = im[fm];
703       fm      = fill[fm];
704     }
705   }
706   PetscFree(ajfill);
707   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
708   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
709   ierr = ISDestroy(isicol); CHKERRQ(ierr);
710   PetscFree(fill); PetscFree(im);
711 
712   {
713     double af = ((double)ainew[n])/((double)ai[n]);
714     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
715              realloc,f,af);
716     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
717     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
718     PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
719   }
720 
721   /* put together the new matrix */
722   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
723   b = (Mat_SeqAIJ *) (*fact)->data;
724   PetscFree(b->imax);
725   b->singlemalloc = 0;
726   len = (ainew[n] + shift)*sizeof(Scalar);
727   /* the next line frees the default space generated by the Create() */
728   PetscFree(b->a); PetscFree(b->ilen);
729   b->a          = (Scalar *) PetscMalloc( len+1 ); CHKPTRQ(b->a);
730   b->j          = ajnew;
731   b->i          = ainew;
732   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
733   b->diag       = dloc;
734   b->ilen       = 0;
735   b->imax       = 0;
736   b->row        = isrow;
737   b->col        = iscol;
738   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));
739   CHKPTRQ(b->solve_work);
740   /* In b structure:  Free imax, ilen, old a, old j.
741      Allocate dloc, solve_work, new a, new j */
742   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
743   b->maxnz          = b->nz = ainew[n] + shift;
744   (*fact)->factor   = FACTOR_LU;
745 
746   (*fact)->info.factor_mallocs    = realloc;
747   (*fact)->info.fill_ratio_given  = f;
748   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
749 
750   PetscFunctionReturn(0);
751 }
752 
753 
754 
755 
756