xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 026e39d04bcf2a445c0a9332c845c324154b2e8f)
1 #ifndef lint
2 static char vcid[] = "$Id: aijfact.c,v 1.69 1996/12/08 20:07:38 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/vec/vecimpl.h"
7 
8 #undef __FUNCTION__
9 #define __FUNCTION__ "MatOrder_Flow_SeqAIJ"
10 int MatOrder_Flow_SeqAIJ(Mat mat,MatReordering type,IS *irow,IS *icol)
11 {
12   SETERRQ(PETSC_ERR_SUP,"MatOrder_Flow_SeqAIJ:Code not written");
13 }
14 
15 /*
16     Factorization code for AIJ format.
17 */
18 #undef __FUNCTION__
19 #define __FUNCTION__ "MatLUFactorSymbolic_SeqAIJ"
20 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
21 {
22   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
23   IS         isicol;
24   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
25   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
26   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
27 
28   PetscValidHeaderSpecific(isrow,IS_COOKIE);
29   PetscValidHeaderSpecific(iscol,IS_COOKIE);
30 
31   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
32   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
33 
34   /* get new row pointers */
35   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
36   ainew[0] = -shift;
37   /* don't know how many column pointers are needed so estimate */
38   jmax = (int) (f*ai[n]+(!shift));
39   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
40   /* fill is a linked list of nonzeros in active row */
41   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
42   im = fill + n + 1;
43   /* idnew is location of diagonal in factor */
44   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
45   idnew[0] = -shift;
46 
47   for ( i=0; i<n; i++ ) {
48     /* first copy previous fill into linked list */
49     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
50     ajtmp   = aj + ai[r[i]] + shift;
51     fill[n] = n;
52     while (nz--) {
53       fm  = n;
54       idx = ic[*ajtmp++ + shift];
55       do {
56         m  = fm;
57         fm = fill[m];
58       } while (fm < idx);
59       fill[m]   = idx;
60       fill[idx] = fm;
61     }
62     row = fill[n];
63     while ( row < i ) {
64       ajtmp = ajnew + idnew[row] + (!shift);
65       nzbd  = 1 + idnew[row] - ainew[row];
66       nz    = im[row] - nzbd;
67       fm    = row;
68       while (nz-- > 0) {
69         idx = *ajtmp++ + shift;
70         nzbd++;
71         if (idx == i) im[row] = nzbd;
72         do {
73           m  = fm;
74           fm = fill[m];
75         } while (fm < idx);
76         if (fm != idx) {
77           fill[m]   = idx;
78           fill[idx] = fm;
79           fm        = idx;
80           nnz++;
81         }
82       }
83       row = fill[row];
84     }
85     /* copy new filled row into permanent storage */
86     ainew[i+1] = ainew[i] + nnz;
87     if (ainew[i+1] > jmax) {
88       /* allocate a longer ajnew */
89       int maxadd;
90       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
91       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
92       jmax += maxadd;
93       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
94       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
95       PetscFree(ajnew);
96       ajnew = ajtmp;
97       realloc++; /* count how many times we realloc */
98     }
99     ajtmp = ajnew + ainew[i] + shift;
100     fm    = fill[n];
101     nzi   = 0;
102     im[i] = nnz;
103     while (nnz--) {
104       if (fm < i) nzi++;
105       *ajtmp++ = fm - shift;
106       fm       = fill[fm];
107     }
108     idnew[i] = ainew[i] + nzi;
109   }
110 
111   PLogInfo(A,
112     "Info:MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
113                              realloc,f,((double)ainew[n])/((double)ai[i]));
114 
115   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
116   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
117 
118   PetscFree(fill);
119 
120   /* put together the new matrix */
121   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr);
122   PLogObjectParent(*B,isicol);
123   ierr = ISDestroy(isicol); CHKERRQ(ierr);
124   b = (Mat_SeqAIJ *) (*B)->data;
125   PetscFree(b->imax);
126   b->singlemalloc = 0;
127   /* the next line frees the default space generated by the Create() */
128   PetscFree(b->a); PetscFree(b->ilen);
129   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
130   b->j          = ajnew;
131   b->i          = ainew;
132   b->diag       = idnew;
133   b->ilen       = 0;
134   b->imax       = 0;
135   b->row        = isrow;
136   b->col        = iscol;
137   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
138   /* In b structure:  Free imax, ilen, old a, old j.
139      Allocate idnew, solve_work, new a, new j */
140   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
141   b->maxnz = b->nz = ainew[n] + shift;
142 
143   (*B)->info.factor_mallocs    = realloc;
144   (*B)->info.fill_ratio_given  = f;
145   (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]);
146 
147   return 0;
148 }
149 /* ----------------------------------------------------------- */
150 int Mat_AIJ_CheckInode(Mat);
151 
152 #undef __FUNCTION__
153 #define __FUNCTION__ "MatLUFactorNumeric_SeqAIJ"
154 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
155 {
156   Mat        C = *B;
157   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
158   IS         iscol = b->col, isrow = b->row, isicol;
159   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
160   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
161   int        *diag_offset = b->diag,diag,k;
162   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
163   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
164   double     ssum;
165   /* These declarations are for optimizations.  They reduce the number of
166      memory references that are made by locally storing information; the
167      word "register" used here with pointers can be viewed as "private" or
168      "known only to me"
169    */
170   register Scalar *pv, *rtmps,*u_values;
171   register int    *pj;
172 
173   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
174   PLogObjectParent(*B,isicol);
175   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
176   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
177   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp);
178   PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
179   rtmps = rtmp + shift; ics = ic + shift;
180 
181   /* precalcuate row sums */
182   if (preserve_row_sums) {
183     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums);
184     for ( i=0; i<n; i++ ) {
185       nz  = a->i[r[i]+1] - a->i[r[i]];
186       v   = a->a + a->i[r[i]] + shift;
187       sum = 0.0;
188       for ( j=0; j<nz; j++ ) sum += v[j];
189       rowsums[i] = sum;
190     }
191   }
192 
193   for ( i=0; i<n; i++ ) {
194     nz    = ai[i+1] - ai[i];
195     ajtmp = aj + ai[i] + shift;
196     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
197 
198     /* load in initial (unfactored row) */
199     nz       = a->i[r[i]+1] - a->i[r[i]];
200     ajtmpold = a->j + a->i[r[i]] + shift;
201     v        = a->a + a->i[r[i]] + shift;
202     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
203 
204     row = *ajtmp++ + shift;
205     while (row < i) {
206       pc = rtmp + row;
207       if (*pc != 0.0) {
208         pv         = b->a + diag_offset[row] + shift;
209         pj         = b->j + diag_offset[row] + (!shift);
210         multiplier = *pc / *pv++;
211         *pc        = multiplier;
212         nz         = ai[row+1] - diag_offset[row] - 1;
213         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
214         PLogFlops(2*nz);
215       }
216       row = *ajtmp++ + shift;
217     }
218     /* finished row so stick it into b->a */
219     pv = b->a + ai[i] + shift;
220     pj = b->j + ai[i] + shift;
221     nz = ai[i+1] - ai[i];
222     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
223     diag = diag_offset[i] - ai[i];
224     /*
225           Possibly adjust diagonal entry on current row to force
226         LU matrix to have same row sum as initial matrix.
227     */
228     if (preserve_row_sums) {
229       pj  = b->j + ai[i] + shift;
230       sum = rowsums[i];
231       for ( j=0; j<diag; j++ ) {
232         u_values  = b->a + diag_offset[pj[j]] + shift;
233         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
234         inner_sum = 0.0;
235         for ( k=0; k<nz; k++ ) {
236           inner_sum += u_values[k];
237         }
238         sum -= pv[j]*inner_sum;
239 
240       }
241       nz       = ai[i+1] - diag_offset[i] - 1;
242       u_values = b->a + diag_offset[i] + 1 + shift;
243       for ( k=0; k<nz; k++ ) {
244         sum -= u_values[k];
245       }
246       ssum = PetscAbsScalar(sum/pv[diag]);
247       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
248     }
249     /* check pivot entry for current row */
250     if (pv[diag] == 0.0) {
251       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"MatLUFactorNumeric_SeqAIJ:Zero pivot");
252     }
253   }
254 
255   /* invert diagonal entries for simplier triangular solves */
256   for ( i=0; i<n; i++ ) {
257     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
258   }
259 
260   if (preserve_row_sums) PetscFree(rowsums);
261   PetscFree(rtmp);
262   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
263   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
264   ierr = ISDestroy(isicol); CHKERRQ(ierr);
265   C->factor = FACTOR_LU;
266   ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr);
267   C->assembled = PETSC_TRUE;
268   PLogFlops(b->n);
269   return 0;
270 }
271 /* ----------------------------------------------------------- */
272 #undef __FUNCTION__
273 #define __FUNCTION__ "MatLUFactor_SeqAIJ"
274 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
275 {
276   Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data;
277   int        ierr;
278   Mat        C;
279 
280   ierr = MatLUFactorSymbolic_SeqAIJ(A,row,col,f,&C); CHKERRQ(ierr);
281   ierr = MatLUFactorNumeric_SeqAIJ(A,&C); CHKERRQ(ierr);
282 
283   /* free all the data structures from mat */
284   PetscFree(mat->a);
285   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
286   if (mat->diag) PetscFree(mat->diag);
287   if (mat->ilen) PetscFree(mat->ilen);
288   if (mat->imax) PetscFree(mat->imax);
289   if (mat->solve_work) PetscFree(mat->solve_work);
290   if (mat->inode.size) PetscFree(mat->inode.size);
291   PetscFree(mat);
292 
293   PetscMemcpy(A,C,sizeof(struct _Mat));
294   PetscHeaderDestroy(C);
295   return 0;
296 }
297 /* ----------------------------------------------------------- */
298 #undef __FUNCTION__
299 #define __FUNCTION__ "MatSolve_SeqAIJ"
300 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
301 {
302   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
303   IS         iscol = a->col, isrow = a->row;
304   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
305   int        nz,shift = a->indexshift,*rout,*cout;
306   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
307 
308   if (!n) return 0;
309 
310   VecGetArray_Fast(bb,b);
311   VecGetArray_Fast(xx,x);
312   tmp  = a->solve_work;
313 
314   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
315   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
316 
317   /* forward solve the lower triangular */
318   tmp[0] = b[*r++];
319   tmps   = tmp + shift;
320   for ( i=1; i<n; i++ ) {
321     v   = aa + ai[i] + shift;
322     vi  = aj + ai[i] + shift;
323     nz  = a->diag[i] - ai[i];
324     sum = b[*r++];
325     while (nz--) sum -= *v++ * tmps[*vi++];
326     tmp[i] = sum;
327   }
328 
329   /* backward solve the upper triangular */
330   for ( i=n-1; i>=0; i-- ){
331     v   = aa + a->diag[i] + (!shift);
332     vi  = aj + a->diag[i] + (!shift);
333     nz  = ai[i+1] - a->diag[i] - 1;
334     sum = tmp[i];
335     while (nz--) sum -= *v++ * tmps[*vi++];
336     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
337   }
338 
339   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
340   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
341   PLogFlops(2*a->nz - a->n);
342   return 0;
343 }
344 
345 #undef __FUNCTION__
346 #define __FUNCTION__ "MatSolveAdd_SeqAIJ"
347 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
348 {
349   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
350   IS         iscol = a->col, isrow = a->row;
351   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
352   int        nz, shift = a->indexshift,*rout,*cout;
353   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
354 
355   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);}
356 
357   VecGetArray_Fast(bb,b);
358   VecGetArray_Fast(xx,x);
359   tmp  = a->solve_work;
360 
361   ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout;
362   ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1);
363 
364   /* forward solve the lower triangular */
365   tmp[0] = b[*r++];
366   for ( i=1; i<n; i++ ) {
367     v   = aa + ai[i] + shift;
368     vi  = aj + ai[i] + shift;
369     nz  = a->diag[i] - ai[i];
370     sum = b[*r++];
371     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
372     tmp[i] = sum;
373   }
374 
375   /* backward solve the upper triangular */
376   for ( i=n-1; i>=0; i-- ){
377     v   = aa + a->diag[i] + (!shift);
378     vi  = aj + a->diag[i] + (!shift);
379     nz  = ai[i+1] - a->diag[i] - 1;
380     sum = tmp[i];
381     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
382     tmp[i] = sum*aa[a->diag[i]+shift];
383     x[*c--] += tmp[i];
384   }
385 
386   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
387   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
388   PLogFlops(2*a->nz);
389 
390   return 0;
391 }
392 /* -------------------------------------------------------------------*/
393 #undef __FUNCTION__
394 #define __FUNCTION__ "MatSolveTrans_SeqAIJ"
395 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx)
396 {
397   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
398   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
399   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
400   int        nz,shift = a->indexshift,*rout,*cout;
401   Scalar     *x,*b,*tmp, *aa = a->a, *v;
402 
403   VecGetArray_Fast(bb,b);
404   VecGetArray_Fast(xx,x);
405   tmp  = a->solve_work;
406 
407   /* invert the permutations */
408   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
409   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
410 
411   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
412   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
413 
414   /* copy the b into temp work space according to permutation */
415   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
416 
417   /* forward solve the U^T */
418   for ( i=0; i<n; i++ ) {
419     v   = aa + a->diag[i] + shift;
420     vi  = aj + a->diag[i] + (!shift);
421     nz  = ai[i+1] - a->diag[i] - 1;
422     tmp[i] *= *v++;
423     while (nz--) {
424       tmp[*vi++ + shift] -= (*v++)*tmp[i];
425     }
426   }
427 
428   /* backward solve the L^T */
429   for ( i=n-1; i>=0; i-- ){
430     v   = aa + a->diag[i] - 1 + shift;
431     vi  = aj + a->diag[i] - 1 + shift;
432     nz  = a->diag[i] - ai[i];
433     while (nz--) {
434       tmp[*vi-- + shift] -= (*v--)*tmp[i];
435     }
436   }
437 
438   /* copy tmp into x according to permutation */
439   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
440 
441   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
442   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
443   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
444   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
445 
446   PLogFlops(2*a->nz-a->n);
447   return 0;
448 }
449 
450 #undef __FUNCTION__
451 #define __FUNCTION__ "MatSolveTransAdd_SeqAIJ"
452 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
453 {
454   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
455   IS         iscol = a->col, isrow = a->row, invisrow,inviscol;
456   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
457   int        nz,shift = a->indexshift, *rout, *cout;
458   Scalar     *x,*b,*tmp, *aa = a->a, *v;
459 
460   if (zz != xx) VecCopy(zz,xx);
461 
462   VecGetArray_Fast(bb,b);
463   VecGetArray_Fast(xx,x);
464   tmp = a->solve_work;
465 
466   /* invert the permutations */
467   ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr);
468   ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr);
469   ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout;
470   ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout;
471 
472   /* copy the b into temp work space according to permutation */
473   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
474 
475   /* forward solve the U^T */
476   for ( i=0; i<n; i++ ) {
477     v   = aa + a->diag[i] + shift;
478     vi  = aj + a->diag[i] + (!shift);
479     nz  = ai[i+1] - a->diag[i] - 1;
480     tmp[i] *= *v++;
481     while (nz--) {
482       tmp[*vi++ + shift] -= (*v++)*tmp[i];
483     }
484   }
485 
486   /* backward solve the L^T */
487   for ( i=n-1; i>=0; i-- ){
488     v   = aa + a->diag[i] - 1 + shift;
489     vi  = aj + a->diag[i] - 1 + shift;
490     nz  = a->diag[i] - ai[i];
491     while (nz--) {
492       tmp[*vi-- + shift] -= (*v--)*tmp[i];
493     }
494   }
495 
496   /* copy tmp into x according to permutation */
497   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
498 
499   ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr);
500   ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr);
501   ierr = ISDestroy(invisrow); CHKERRQ(ierr);
502   ierr = ISDestroy(inviscol); CHKERRQ(ierr);
503 
504   PLogFlops(2*a->nz);
505   return 0;
506 }
507 /* ----------------------------------------------------------------*/
508 
509 #undef __FUNCTION__
510 #define __FUNCTION__ "MatILUFactorSymbolic_SeqAIJ"
511 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
512 {
513   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
514   IS         isicol;
515   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
516   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
517   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
518   int        incrlev,nnz,i,shift = a->indexshift;
519   PetscTruth col_identity, row_identity;
520 
521   /* special case that simply copies fill pattern */
522   ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity);
523   if (levels == 0 && row_identity && col_identity) {
524     ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
525     (*fact)->factor = FACTOR_LU;
526     b               = (Mat_SeqAIJ *) (*fact)->data;
527     if (!b->diag) {
528       ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr);
529     }
530     b->row          = isrow;
531     b->col          = iscol;
532     b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
533     return 0;
534   }
535 
536   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
537   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
538   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
539 
540   /* get new row pointers */
541   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
542   ainew[0] = -shift;
543   /* don't know how many column pointers are needed so estimate */
544   jmax = (int) (f*(ai[n]+!shift));
545   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
546   /* ajfill is level of fill for each fill entry */
547   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
548   /* fill is a linked list of nonzeros in active row */
549   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
550   /* im is level for each filled value */
551   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
552   /* dloc is location of diagonal in factor */
553   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
554   dloc[0]  = 0;
555   for ( prow=0; prow<n; prow++ ) {
556     /* first copy previous fill into linked list */
557     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
558     xi      = aj + ai[r[prow]] + shift;
559     fill[n] = n;
560     while (nz--) {
561       fm  = n;
562       idx = ic[*xi++ + shift];
563       do {
564         m  = fm;
565         fm = fill[m];
566       } while (fm < idx);
567       fill[m]   = idx;
568       fill[idx] = fm;
569       im[idx]   = 0;
570     }
571     nzi = 0;
572     row = fill[n];
573     while ( row < prow ) {
574       incrlev = im[row] + 1;
575       nz      = dloc[row];
576       xi      = ajnew  + ainew[row] + shift + nz;
577       flev    = ajfill + ainew[row] + shift + nz + 1;
578       nnz     = ainew[row+1] - ainew[row] - nz - 1;
579       if (*xi++ + shift != row) {
580         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"MatILUFactorSymbolic_SeqAIJ:zero pivot");
581       }
582       fm      = row;
583       while (nnz-- > 0) {
584         idx = *xi++ + shift;
585         if (*flev + incrlev > levels) {
586           flev++;
587           continue;
588         }
589         do {
590           m  = fm;
591           fm = fill[m];
592         } while (fm < idx);
593         if (fm != idx) {
594           im[idx]   = *flev + incrlev;
595           fill[m]   = idx;
596           fill[idx] = fm;
597           fm        = idx;
598           nzf++;
599         }
600         else {
601           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
602         }
603         flev++;
604       }
605       row = fill[row];
606       nzi++;
607     }
608     /* copy new filled row into permanent storage */
609     ainew[prow+1] = ainew[prow] + nzf;
610     if (ainew[prow+1] > jmax-shift) {
611       /* allocate a longer ajnew */
612       int maxadd;
613       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
614       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
615       jmax += maxadd;
616       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
617       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
618       PetscFree(ajnew);
619       ajnew = xi;
620       /* allocate a longer ajfill */
621       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
622       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
623       PetscFree(ajfill);
624       ajfill = xi;
625       realloc++;
626     }
627     xi          = ajnew + ainew[prow] + shift;
628     flev        = ajfill + ainew[prow] + shift;
629     dloc[prow]  = nzi;
630     fm          = fill[n];
631     while (nzf--) {
632       *xi++   = fm - shift;
633       *flev++ = im[fm];
634       fm      = fill[fm];
635     }
636   }
637   PetscFree(ajfill);
638   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
639   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
640   ierr = ISDestroy(isicol); CHKERRQ(ierr);
641   PetscFree(fill); PetscFree(im);
642 
643   PLogInfo(A,
644     "Info:MatILUFactorSymbolic_SeqAIJ:Realloc %d Fill ratio:given %g needed %g\n",
645                              realloc,f,((double)ainew[n])/((double)ai[prow]));
646 
647   /* put together the new matrix */
648   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
649   b = (Mat_SeqAIJ *) (*fact)->data;
650   PetscFree(b->imax);
651   b->singlemalloc = 0;
652   len = (ainew[n] + shift)*sizeof(Scalar);
653   /* the next line frees the default space generated by the Create() */
654   PetscFree(b->a); PetscFree(b->ilen);
655   b->a          = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
656   b->j          = ajnew;
657   b->i          = ainew;
658   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
659   b->diag       = dloc;
660   b->ilen       = 0;
661   b->imax       = 0;
662   b->row        = isrow;
663   b->col        = iscol;
664   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));
665   CHKPTRQ(b->solve_work);
666   /* In b structure:  Free imax, ilen, old a, old j.
667      Allocate dloc, solve_work, new a, new j */
668   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
669   b->maxnz          = b->nz = ainew[n] + shift;
670   (*fact)->factor   = FACTOR_LU;
671 
672   (*fact)->info.factor_mallocs    = realloc;
673   (*fact)->info.fill_ratio_given  = f;
674   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
675 
676   return 0;
677 }
678 
679 
680 
681 
682