xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision de6a44a3c49fb11edb0226bf70e60025dba29d78)
15d517e7eSBarry Smith #ifndef lint
2*de6a44a3SBarry Smith static char vcid[] = "$Id: baijfact.c,v 1.2 1996/02/15 04:38:54 bsmith Exp bsmith $";
35d517e7eSBarry Smith #endif
45d517e7eSBarry Smith /*
5ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
65d517e7eSBarry Smith */
75d517e7eSBarry Smith 
8ec3a8b7bSBarry Smith #include "baij.h"
9ec3a8b7bSBarry Smith 
10ec3a8b7bSBarry Smith /*
11ec3a8b7bSBarry Smith     The symbolic factorization code is identical to that for AIJ format,
12ec3a8b7bSBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
13ec3a8b7bSBarry Smith   NOT good code reuse.
14ec3a8b7bSBarry Smith */
15ec3a8b7bSBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
165d517e7eSBarry Smith {
17ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
185d517e7eSBarry Smith   IS          isicol;
19ec3a8b7bSBarry Smith   int         *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j;
20*de6a44a3SBarry Smith   int         *ainew,*ajnew, jmax,*fill, *ajtmp, nz, bs = a->bs;
215d517e7eSBarry Smith   int         *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im;
225d517e7eSBarry Smith 
23ec3a8b7bSBarry Smith   if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square");
24ec3a8b7bSBarry Smith   if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation");
25ec3a8b7bSBarry Smith   if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation");
265d517e7eSBarry Smith 
275d517e7eSBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
285d517e7eSBarry Smith   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
295d517e7eSBarry Smith 
305d517e7eSBarry Smith   /* get new row pointers */
315d517e7eSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
32*de6a44a3SBarry Smith   ainew[0] = 0;
335d517e7eSBarry Smith   /* don't know how many column pointers are needed so estimate */
34*de6a44a3SBarry Smith   jmax = (int) (f*ai[n] + 1);
355d517e7eSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
365d517e7eSBarry Smith   /* fill is a linked list of nonzeros in active row */
375d517e7eSBarry Smith   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
385d517e7eSBarry Smith   im = fill + n + 1;
395d517e7eSBarry Smith   /* idnew is location of diagonal in factor */
405d517e7eSBarry Smith   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
41*de6a44a3SBarry Smith   idnew[0] = 0;
425d517e7eSBarry Smith 
435d517e7eSBarry Smith   for ( i=0; i<n; i++ ) {
445d517e7eSBarry Smith     /* first copy previous fill into linked list */
455d517e7eSBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
46*de6a44a3SBarry Smith     ajtmp   = aj + ai[r[i]];
475d517e7eSBarry Smith     fill[n] = n;
485d517e7eSBarry Smith     while (nz--) {
495d517e7eSBarry Smith       fm  = n;
50*de6a44a3SBarry Smith       idx = ic[*ajtmp++];
515d517e7eSBarry Smith       do {
525d517e7eSBarry Smith         m  = fm;
535d517e7eSBarry Smith         fm = fill[m];
545d517e7eSBarry Smith       } while (fm < idx);
555d517e7eSBarry Smith       fill[m]   = idx;
565d517e7eSBarry Smith       fill[idx] = fm;
575d517e7eSBarry Smith     }
585d517e7eSBarry Smith     row = fill[n];
595d517e7eSBarry Smith     while ( row < i ) {
60*de6a44a3SBarry Smith       ajtmp = ajnew + idnew[row] + 1;
615d517e7eSBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
625d517e7eSBarry Smith       nz    = im[row] - nzbd;
635d517e7eSBarry Smith       fm    = row;
645d517e7eSBarry Smith       while (nz-- > 0) {
65*de6a44a3SBarry Smith         idx = *ajtmp++;
665d517e7eSBarry Smith         nzbd++;
675d517e7eSBarry Smith         if (idx == i) im[row] = nzbd;
685d517e7eSBarry Smith         do {
695d517e7eSBarry Smith           m  = fm;
705d517e7eSBarry Smith           fm = fill[m];
715d517e7eSBarry Smith         } while (fm < idx);
725d517e7eSBarry Smith         if (fm != idx) {
735d517e7eSBarry Smith           fill[m]   = idx;
745d517e7eSBarry Smith           fill[idx] = fm;
755d517e7eSBarry Smith           fm        = idx;
765d517e7eSBarry Smith           nnz++;
775d517e7eSBarry Smith         }
785d517e7eSBarry Smith       }
795d517e7eSBarry Smith       row = fill[row];
805d517e7eSBarry Smith     }
815d517e7eSBarry Smith     /* copy new filled row into permanent storage */
825d517e7eSBarry Smith     ainew[i+1] = ainew[i] + nnz;
835d517e7eSBarry Smith     if (ainew[i+1] > jmax+1) {
845d517e7eSBarry Smith       /* allocate a longer ajnew */
855d517e7eSBarry Smith       int maxadd;
86*de6a44a3SBarry Smith       maxadd = (int) ((f*(ai[n]+1)*(n-i+5))/n);
875d517e7eSBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
885d517e7eSBarry Smith       jmax += maxadd;
895d517e7eSBarry Smith       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
90*de6a44a3SBarry Smith       PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));
915d517e7eSBarry Smith       PetscFree(ajnew);
925d517e7eSBarry Smith       ajnew = ajtmp;
935d517e7eSBarry Smith       realloc++; /* count how many times we realloc */
945d517e7eSBarry Smith     }
95*de6a44a3SBarry Smith     ajtmp = ajnew + ainew[i];
965d517e7eSBarry Smith     fm    = fill[n];
975d517e7eSBarry Smith     nzi   = 0;
985d517e7eSBarry Smith     im[i] = nnz;
995d517e7eSBarry Smith     while (nnz--) {
1005d517e7eSBarry Smith       if (fm < i) nzi++;
101*de6a44a3SBarry Smith       *ajtmp++ = fm;
1025d517e7eSBarry Smith       fm       = fill[fm];
1035d517e7eSBarry Smith     }
1045d517e7eSBarry Smith     idnew[i] = ainew[i] + nzi;
1055d517e7eSBarry Smith   }
1065d517e7eSBarry Smith 
1075d517e7eSBarry Smith   PLogInfo((PetscObject)A,
108ec3a8b7bSBarry Smith     "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
1095d517e7eSBarry Smith                              realloc,f,((double)ainew[n])/((double)ai[i]));
1105d517e7eSBarry Smith 
1115d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
1125d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
1135d517e7eSBarry Smith 
1145d517e7eSBarry Smith   PetscFree(fill);
1155d517e7eSBarry Smith 
1165d517e7eSBarry Smith   /* put together the new matrix */
117ec3a8b7bSBarry Smith   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr);
1185d517e7eSBarry Smith   PLogObjectParent(*B,isicol);
1195d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
120ec3a8b7bSBarry Smith   b = (Mat_SeqBAIJ *) (*B)->data;
1215d517e7eSBarry Smith   PetscFree(b->imax);
1225d517e7eSBarry Smith   b->singlemalloc = 0;
123*de6a44a3SBarry Smith   len             = ainew[n]*sizeof(Scalar);
1245d517e7eSBarry Smith   /* the next line frees the default space generated by the Create() */
1255d517e7eSBarry Smith   PetscFree(b->a); PetscFree(b->ilen);
126ec3a8b7bSBarry Smith   b->a          = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a);
1275d517e7eSBarry Smith   b->j          = ajnew;
1285d517e7eSBarry Smith   b->i          = ainew;
1295d517e7eSBarry Smith   b->diag       = idnew;
1305d517e7eSBarry Smith   b->ilen       = 0;
1315d517e7eSBarry Smith   b->imax       = 0;
1325d517e7eSBarry Smith   b->row        = isrow;
1335d517e7eSBarry Smith   b->col        = iscol;
134*de6a44a3SBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));
1355d517e7eSBarry Smith   CHKPTRQ(b->solve_work);
1365d517e7eSBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
1375d517e7eSBarry Smith      Allocate idnew, solve_work, new a, new j */
138*de6a44a3SBarry Smith   PLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(Scalar)));
139*de6a44a3SBarry Smith   b->maxnz = b->nz = ainew[n];
1405d517e7eSBarry Smith 
1415d517e7eSBarry Smith   return 0;
1425d517e7eSBarry Smith }
1435d517e7eSBarry Smith 
144ec3a8b7bSBarry Smith #include "pinclude/plapack.h"
145ec3a8b7bSBarry Smith 
146ec3a8b7bSBarry Smith #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));}
147ec3a8b7bSBarry Smith 
148ec3a8b7bSBarry Smith #define BlockCopy(v_in,v_out,idx_in,idx_out) \
149ec3a8b7bSBarry Smith   {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));}
150ec3a8b7bSBarry Smith 
151ec3a8b7bSBarry Smith #define BlockInvert(vv,idx) \
152ec3a8b7bSBarry Smith   { int _i,info; Scalar *w = vv + bs2*idx; \
153ec3a8b7bSBarry Smith     LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \
154ec3a8b7bSBarry Smith     PetscMemzero(v_work,bs2*sizeof(Scalar));  \
155ec3a8b7bSBarry Smith     for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \
156ec3a8b7bSBarry Smith     LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\
157ec3a8b7bSBarry Smith     PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \
158ec3a8b7bSBarry Smith   }
159ec3a8b7bSBarry Smith 
160*de6a44a3SBarry Smith #define BlockMult(v1,v2,v3) \
161*de6a44a3SBarry Smith   {Scalar _DOne=1.0, _DZero=0.0;\
162*de6a44a3SBarry Smith   BLgemm_("N","N",&bs,&bs,&bs,&_DOne,v1,&bs,v2,&bs,&_DZero,v3,&bs);}
163*de6a44a3SBarry Smith 
164*de6a44a3SBarry Smith #define BlockMultSub(v1,v2,v3,idx2,idx3) \
165*de6a44a3SBarry Smith   {Scalar _DOne=1.0, _DMOne=-1.0;\
166*de6a44a3SBarry Smith   BLgemm_("N","N",&bs,&bs,&bs,&_DMOne,v1,&bs,v2+bs2*idx2,&bs,&_DOne,v3+bs2*idx3,&bs);}
167*de6a44a3SBarry Smith 
168ec3a8b7bSBarry Smith /* ----------------------------------------------------------- */
169ec3a8b7bSBarry Smith int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B)
1705d517e7eSBarry Smith {
1715d517e7eSBarry Smith   Mat         C = *B;
172ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data;
1735d517e7eSBarry Smith   IS          iscol = b->col, isrow = b->row, isicol;
174ec3a8b7bSBarry Smith   int         *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j;
175*de6a44a3SBarry Smith   int         *ajtmpold, *ajtmp, nz, row, bslog;
176ec3a8b7bSBarry Smith   int         *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots;
177*de6a44a3SBarry Smith   register Scalar *pv,*v,*rtmp,*multiplier,*v_work,*pc;
1785d517e7eSBarry Smith   register int    *pj;
1795d517e7eSBarry Smith 
1805d517e7eSBarry Smith   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
1815d517e7eSBarry Smith   PLogObjectParent(*B,isicol);
1825d517e7eSBarry Smith   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
1835d517e7eSBarry Smith   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
184ec3a8b7bSBarry Smith   rtmp  = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp);
1855d517e7eSBarry Smith 
186ec3a8b7bSBarry Smith   /* generate work space needed by dense LU factorization */
187*de6a44a3SBarry Smith   v_work     = (Scalar *) PetscMalloc((bs+2*bs2)*sizeof(Scalar));CHKPTRQ(v_work);
188*de6a44a3SBarry Smith   multiplier = v_work + bs2;
189*de6a44a3SBarry Smith   v_pivots   = (int *) (multiplier + bs2);
190*de6a44a3SBarry Smith 
191*de6a44a3SBarry Smith   /* flops in while loop */
192*de6a44a3SBarry Smith   bslog = 4*bs*bs2 - bs;
1935d517e7eSBarry Smith 
1945d517e7eSBarry Smith   for ( i=0; i<n; i++ ) {
1955d517e7eSBarry Smith     nz    = ai[i+1] - ai[i];
196*de6a44a3SBarry Smith     ajtmp = aj + ai[i];
197*de6a44a3SBarry Smith     for  ( j=0; j<nz; j++ ) BlockZero(rtmp,ajtmp[j]);
1985d517e7eSBarry Smith     /* load in initial (unfactored row) */
1995d517e7eSBarry Smith     nz       = a->i[r[i]+1] - a->i[r[i]];
200*de6a44a3SBarry Smith     ajtmpold = a->j + a->i[r[i]];
201*de6a44a3SBarry Smith     v        = a->a + bs2*a->i[r[i]];
202*de6a44a3SBarry Smith     for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ic[ajtmpold[j]]);
203*de6a44a3SBarry Smith     row = *ajtmp++;
2045d517e7eSBarry Smith     while (row < i) {
205*de6a44a3SBarry Smith       pc = rtmp + bs2*row;
206*de6a44a3SBarry Smith /*      if (*pc) { */
207*de6a44a3SBarry Smith         pv = b->a + bs2*diag_offset[row];
208*de6a44a3SBarry Smith         pj = b->j + diag_offset[row] + 1;
209*de6a44a3SBarry Smith 	BlockMult(pc,pv,multiplier);
210*de6a44a3SBarry Smith         BlockCopy(multiplier,pc,0,0);
2115d517e7eSBarry Smith         nz = ai[row+1] - diag_offset[row] - 1;
212ec3a8b7bSBarry Smith         pv += bs2;
213*de6a44a3SBarry Smith         for (j=0; j<nz; j++) BlockMultSub(multiplier,pv,rtmp,j,pj[j]);
214*de6a44a3SBarry Smith         PLogFlops(bslog*nz);
215*de6a44a3SBarry Smith /*      } */
216*de6a44a3SBarry Smith         row = *ajtmp++;
2175d517e7eSBarry Smith     }
2185d517e7eSBarry Smith     /* finished row so stick it into b->a */
219*de6a44a3SBarry Smith     pv = b->a + bs2*ai[i];
220*de6a44a3SBarry Smith     pj = b->j + ai[i];
2215d517e7eSBarry Smith     nz = ai[i+1] - ai[i];
222*de6a44a3SBarry Smith     for ( j=0; j<nz; j++ ) BlockCopy(rtmp,pv,pj[j],j);
2235d517e7eSBarry Smith     diag = diag_offset[i] - ai[i];
224ec3a8b7bSBarry Smith     /* invert diagonal block */
225ec3a8b7bSBarry Smith     BlockInvert(pv,diag);
2265d517e7eSBarry Smith   }
2275d517e7eSBarry Smith 
228*de6a44a3SBarry Smith   PetscFree(rtmp); PetscFree(v_work);
2295d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
2305d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
2315d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
2325d517e7eSBarry Smith   C->factor = FACTOR_LU;
2335d517e7eSBarry Smith   C->assembled = PETSC_TRUE;
234*de6a44a3SBarry Smith   PLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
2355d517e7eSBarry Smith   return 0;
2365d517e7eSBarry Smith }
2375d517e7eSBarry Smith /* ----------------------------------------------------------- */
238ec3a8b7bSBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f)
2395d517e7eSBarry Smith {
240ec3a8b7bSBarry Smith   Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data;
2415d517e7eSBarry Smith   int         ierr;
2425d517e7eSBarry Smith   Mat         C;
2435d517e7eSBarry Smith 
244ec3a8b7bSBarry Smith   ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr);
245ec3a8b7bSBarry Smith   ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr);
2465d517e7eSBarry Smith 
2475d517e7eSBarry Smith   /* free all the data structures from mat */
2485d517e7eSBarry Smith   PetscFree(mat->a);
2495d517e7eSBarry Smith   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
2505d517e7eSBarry Smith   if (mat->diag) PetscFree(mat->diag);
2515d517e7eSBarry Smith   if (mat->ilen) PetscFree(mat->ilen);
2525d517e7eSBarry Smith   if (mat->imax) PetscFree(mat->imax);
2535d517e7eSBarry Smith   if (mat->solve_work) PetscFree(mat->solve_work);
2545d517e7eSBarry Smith   PetscFree(mat);
2555d517e7eSBarry Smith 
2565d517e7eSBarry Smith   PetscMemcpy(A,C,sizeof(struct _Mat));
2575d517e7eSBarry Smith   PetscHeaderDestroy(C);
2585d517e7eSBarry Smith   return 0;
2595d517e7eSBarry Smith }
2605d517e7eSBarry Smith /* ----------------------------------------------------------- */
261ec3a8b7bSBarry Smith int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx)
2625d517e7eSBarry Smith {
263ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
2645d517e7eSBarry Smith   IS          iscol = a->col, isrow = a->row;
265*de6a44a3SBarry Smith   int         *r,*c, ierr, i,  n = a->mbs, *vi, *ai = a->i, *aj = a->j;
266*de6a44a3SBarry Smith   int         nz,bs = a->bs,bs2 = bs*bs,idx,idt,idc, _One = 1;
267*de6a44a3SBarry Smith   Scalar      *xa,*ba,*aa = a->a, *sum, _DOne = 1.0, _DMOne = -1.0;
268*de6a44a3SBarry Smith   Scalar      _DZero = 0.0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
269*de6a44a3SBarry Smith   register Scalar *x, *b, *lsum, *tmp, *v;
2705d517e7eSBarry Smith 
271ec3a8b7bSBarry Smith   if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix");
2725d517e7eSBarry Smith 
273*de6a44a3SBarry Smith   ierr = VecGetArray(bb,&ba); CHKERRQ(ierr); b = ba;
274*de6a44a3SBarry Smith   ierr = VecGetArray(xx,&xa); CHKERRQ(ierr); x = xa;
2755d517e7eSBarry Smith   tmp  = a->solve_work;
2765d517e7eSBarry Smith 
2775d517e7eSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2785d517e7eSBarry Smith   ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1);
2795d517e7eSBarry Smith 
280*de6a44a3SBarry Smith   switch (bs) {
281*de6a44a3SBarry Smith     case 1:
2825d517e7eSBarry Smith       /* forward solve the lower triangular */
2835d517e7eSBarry Smith       tmp[0] = b[*r++];
2845d517e7eSBarry Smith       for ( i=1; i<n; i++ ) {
285*de6a44a3SBarry Smith         v     = aa + ai[i];
286*de6a44a3SBarry Smith         vi    = aj + ai[i];
2875d517e7eSBarry Smith         nz    = a->diag[i] - ai[i];
288*de6a44a3SBarry Smith         sum1  = b[*r++];
289*de6a44a3SBarry Smith         while (nz--) {
290*de6a44a3SBarry Smith           sum1 -= (*v++)*tmp[*vi++];
2915d517e7eSBarry Smith         }
292*de6a44a3SBarry Smith         tmp[i] = sum1;
293*de6a44a3SBarry Smith       }
2945d517e7eSBarry Smith       /* backward solve the upper triangular */
2955d517e7eSBarry Smith       for ( i=n-1; i>=0; i-- ){
296*de6a44a3SBarry Smith         v    = aa + a->diag[i] + 1;
297*de6a44a3SBarry Smith         vi   = aj + a->diag[i] + 1;
2985d517e7eSBarry Smith         nz   = ai[i+1] - a->diag[i] - 1;
299*de6a44a3SBarry Smith         sum1 = tmp[i];
300*de6a44a3SBarry Smith         while (nz--) {
301*de6a44a3SBarry Smith           sum1 -= (*v++)*tmp[*vi++];
302*de6a44a3SBarry Smith         }
303*de6a44a3SBarry Smith         x[*c--] = tmp[i] = aa[a->diag[i]]*sum1;
304*de6a44a3SBarry Smith       }
305*de6a44a3SBarry Smith       break;
306*de6a44a3SBarry Smith     case 2:
307*de6a44a3SBarry Smith       /* forward solve the lower triangular */
308*de6a44a3SBarry Smith       idx    = 2*(*r++);
309*de6a44a3SBarry Smith       tmp[0] = b[idx]; tmp[1] = b[1+idx];
310*de6a44a3SBarry Smith       for ( i=1; i<n; i++ ) {
311*de6a44a3SBarry Smith         v     = aa + 4*ai[i];
312*de6a44a3SBarry Smith         vi    = aj + ai[i];
313*de6a44a3SBarry Smith         nz    = a->diag[i] - ai[i];
314*de6a44a3SBarry Smith         idx   = 2*(*r++);
315*de6a44a3SBarry Smith         sum1  = b[idx]; sum2 = b[1+idx];
316*de6a44a3SBarry Smith         while (nz--) {
317*de6a44a3SBarry Smith           idx   = 2*(*vi++);
318*de6a44a3SBarry Smith           x1    = tmp[idx]; x2 = tmp[1+idx];
319*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[2]*x2;
320*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[3]*x2;
321*de6a44a3SBarry Smith           v += 4;
322*de6a44a3SBarry Smith         }
323*de6a44a3SBarry Smith         idx = 2*i;
324*de6a44a3SBarry Smith         tmp[idx] = sum1; tmp[1+idx] = sum2;
325*de6a44a3SBarry Smith       }
326*de6a44a3SBarry Smith       /* backward solve the upper triangular */
327*de6a44a3SBarry Smith       for ( i=n-1; i>=0; i-- ){
328*de6a44a3SBarry Smith         v    = aa + 4*a->diag[i] + 4;
329*de6a44a3SBarry Smith         vi   = aj + a->diag[i] + 1;
330*de6a44a3SBarry Smith         nz   = ai[i+1] - a->diag[i] - 1;
331*de6a44a3SBarry Smith         idt  = 2*i;
332*de6a44a3SBarry Smith         sum1 = tmp[idt]; sum2 = tmp[1+idt];
333*de6a44a3SBarry Smith         while (nz--) {
334*de6a44a3SBarry Smith           idx   = 2*(*vi++);
335*de6a44a3SBarry Smith           x1    = tmp[idx]; x2 = tmp[1+idx];
336*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[2]*x2;
337*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[3]*x2;
338*de6a44a3SBarry Smith           v += 4;
339*de6a44a3SBarry Smith         }
340*de6a44a3SBarry Smith         idc = 2*(*c--);
341*de6a44a3SBarry Smith         v   = aa + 4*a->diag[i];
342*de6a44a3SBarry Smith         x[idc]   = tmp[idt]   = v[0]*sum1 + v[2]*sum2;
343*de6a44a3SBarry Smith         x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2;
344*de6a44a3SBarry Smith       }
345*de6a44a3SBarry Smith       break;
346*de6a44a3SBarry Smith     case 3:
347*de6a44a3SBarry Smith       /* forward solve the lower triangular */
348*de6a44a3SBarry Smith       idx    = 3*(*r++);
349*de6a44a3SBarry Smith       tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx];
350*de6a44a3SBarry Smith       for ( i=1; i<n; i++ ) {
351*de6a44a3SBarry Smith         v     = aa + 9*ai[i];
352*de6a44a3SBarry Smith         vi    = aj + ai[i];
353*de6a44a3SBarry Smith         nz    = a->diag[i] - ai[i];
354*de6a44a3SBarry Smith         idx   = 3*(*r++);
355*de6a44a3SBarry Smith         sum1  = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx];
356*de6a44a3SBarry Smith         while (nz--) {
357*de6a44a3SBarry Smith           idx   = 3*(*vi++);
358*de6a44a3SBarry Smith           x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
359*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
360*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
361*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
362*de6a44a3SBarry Smith           v += 9;
363*de6a44a3SBarry Smith         }
364*de6a44a3SBarry Smith         idx = 3*i;
365*de6a44a3SBarry Smith         tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3;
366*de6a44a3SBarry Smith       }
367*de6a44a3SBarry Smith       /* backward solve the upper triangular */
368*de6a44a3SBarry Smith       for ( i=n-1; i>=0; i-- ){
369*de6a44a3SBarry Smith         v    = aa + 9*a->diag[i] + 9;
370*de6a44a3SBarry Smith         vi   = aj + a->diag[i] + 1;
371*de6a44a3SBarry Smith         nz   = ai[i+1] - a->diag[i] - 1;
372*de6a44a3SBarry Smith         idt  = 3*i;
373*de6a44a3SBarry Smith         sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt];
374*de6a44a3SBarry Smith         while (nz--) {
375*de6a44a3SBarry Smith           idx   = 3*(*vi++);
376*de6a44a3SBarry Smith           x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
377*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
378*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
379*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
380*de6a44a3SBarry Smith           v += 9;
381*de6a44a3SBarry Smith         }
382*de6a44a3SBarry Smith         idc = 3*(*c--);
383*de6a44a3SBarry Smith         v   = aa + 9*a->diag[i];
384*de6a44a3SBarry Smith         x[idc]   = tmp[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
385*de6a44a3SBarry Smith         x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
386*de6a44a3SBarry Smith         x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
387*de6a44a3SBarry Smith       }
388*de6a44a3SBarry Smith       break;
389*de6a44a3SBarry Smith     case 4:
390*de6a44a3SBarry Smith       /* forward solve the lower triangular */
391*de6a44a3SBarry Smith       idx    = 4*(*r++);
392*de6a44a3SBarry Smith       tmp[0] = b[idx];   tmp[1] = b[1+idx];
393*de6a44a3SBarry Smith       tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
394*de6a44a3SBarry Smith       for ( i=1; i<n; i++ ) {
395*de6a44a3SBarry Smith         v     = aa + 16*ai[i];
396*de6a44a3SBarry Smith         vi    = aj + ai[i];
397*de6a44a3SBarry Smith         nz    = a->diag[i] - ai[i];
398*de6a44a3SBarry Smith         idx   = 4*(*r++);
399*de6a44a3SBarry Smith         sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
400*de6a44a3SBarry Smith         while (nz--) {
401*de6a44a3SBarry Smith           idx   = 4*(*vi++);
402*de6a44a3SBarry Smith           x1    = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx];
403*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
404*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
405*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
406*de6a44a3SBarry Smith           sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
407*de6a44a3SBarry Smith           v += 16;
408*de6a44a3SBarry Smith         }
409*de6a44a3SBarry Smith         idx = 4*i;
410*de6a44a3SBarry Smith         tmp[idx]   = sum1;tmp[1+idx] = sum2;
411*de6a44a3SBarry Smith         tmp[2+idx] = sum3;tmp[3+idx] = sum4;
412*de6a44a3SBarry Smith       }
413*de6a44a3SBarry Smith       /* backward solve the upper triangular */
414*de6a44a3SBarry Smith       for ( i=n-1; i>=0; i-- ){
415*de6a44a3SBarry Smith         v    = aa + 16*a->diag[i] + 16;
416*de6a44a3SBarry Smith         vi   = aj + a->diag[i] + 1;
417*de6a44a3SBarry Smith         nz   = ai[i+1] - a->diag[i] - 1;
418*de6a44a3SBarry Smith         idt  = 4*i;
419*de6a44a3SBarry Smith         sum1 = tmp[idt];  sum2 = tmp[1+idt];
420*de6a44a3SBarry Smith         sum3 = tmp[2+idt];sum4 = tmp[3+idt];
421*de6a44a3SBarry Smith         while (nz--) {
422*de6a44a3SBarry Smith           idx   = 4*(*vi++);
423*de6a44a3SBarry Smith           x1    = tmp[idx];   x2 = tmp[1+idx];
424*de6a44a3SBarry Smith           x3    = tmp[2+idx]; x4 = tmp[3+idx];
425*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
426*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
427*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
428*de6a44a3SBarry Smith           sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
429*de6a44a3SBarry Smith           v += 16;
430*de6a44a3SBarry Smith         }
431*de6a44a3SBarry Smith         idc = 4*(*c--);
432*de6a44a3SBarry Smith         v   = aa + 16*a->diag[i];
433*de6a44a3SBarry Smith         x[idc]   = tmp[idt]   = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4;
434*de6a44a3SBarry Smith         x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4;
435*de6a44a3SBarry Smith         x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4;
436*de6a44a3SBarry Smith         x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4;
437*de6a44a3SBarry Smith       }
438*de6a44a3SBarry Smith       break;
439*de6a44a3SBarry Smith     case 5:
440*de6a44a3SBarry Smith       /* forward solve the lower triangular */
441*de6a44a3SBarry Smith       idx    = 5*(*r++);
442*de6a44a3SBarry Smith       tmp[0] = b[idx];   tmp[1] = b[1+idx];
443*de6a44a3SBarry Smith       tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
444*de6a44a3SBarry Smith       for ( i=1; i<n; i++ ) {
445*de6a44a3SBarry Smith         v     = aa + 25*ai[i];
446*de6a44a3SBarry Smith         vi    = aj + ai[i];
447*de6a44a3SBarry Smith         nz    = a->diag[i] - ai[i];
448*de6a44a3SBarry Smith         idx   = 5*(*r++);
449*de6a44a3SBarry Smith         sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
450*de6a44a3SBarry Smith         sum5  = b[4+idx];
451*de6a44a3SBarry Smith         while (nz--) {
452*de6a44a3SBarry Smith           idx   = 5*(*vi++);
453*de6a44a3SBarry Smith           x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
454*de6a44a3SBarry Smith           x4    = tmp[3+idx];x5 = tmp[4+idx];
455*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
456*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
457*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
458*de6a44a3SBarry Smith           sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
459*de6a44a3SBarry Smith           sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
460*de6a44a3SBarry Smith           v += 25;
461*de6a44a3SBarry Smith         }
462*de6a44a3SBarry Smith         idx = 5*i;
463*de6a44a3SBarry Smith         tmp[idx]   = sum1;tmp[1+idx] = sum2;
464*de6a44a3SBarry Smith         tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
465*de6a44a3SBarry Smith       }
466*de6a44a3SBarry Smith       /* backward solve the upper triangular */
467*de6a44a3SBarry Smith       for ( i=n-1; i>=0; i-- ){
468*de6a44a3SBarry Smith         v    = aa + 25*a->diag[i] + 25;
469*de6a44a3SBarry Smith         vi   = aj + a->diag[i] + 1;
470*de6a44a3SBarry Smith         nz   = ai[i+1] - a->diag[i] - 1;
471*de6a44a3SBarry Smith         idt  = 5*i;
472*de6a44a3SBarry Smith         sum1 = tmp[idt];  sum2 = tmp[1+idt];
473*de6a44a3SBarry Smith         sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
474*de6a44a3SBarry Smith         while (nz--) {
475*de6a44a3SBarry Smith           idx   = 5*(*vi++);
476*de6a44a3SBarry Smith           x1    = tmp[idx];   x2 = tmp[1+idx];
477*de6a44a3SBarry Smith           x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
478*de6a44a3SBarry Smith           sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
479*de6a44a3SBarry Smith           sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
480*de6a44a3SBarry Smith           sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
481*de6a44a3SBarry Smith           sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
482*de6a44a3SBarry Smith           sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
483*de6a44a3SBarry Smith           v += 25;
484*de6a44a3SBarry Smith         }
485*de6a44a3SBarry Smith         idc = 5*(*c--);
486*de6a44a3SBarry Smith         v   = aa + 25*a->diag[i];
487*de6a44a3SBarry Smith         x[idc]   = tmp[idt]   = v[0]*sum1+v[5]*sum2+v[10]*sum3+
488*de6a44a3SBarry Smith                                 v[15]*sum4+v[20]*sum5;
489*de6a44a3SBarry Smith         x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+
490*de6a44a3SBarry Smith                                 v[16]*sum4+v[21]*sum5;
491*de6a44a3SBarry Smith         x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+
492*de6a44a3SBarry Smith                                 v[17]*sum4+v[22]*sum5;
493*de6a44a3SBarry Smith         x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+
494*de6a44a3SBarry Smith                                 v[18]*sum4+v[23]*sum5;
495*de6a44a3SBarry Smith         x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+
496*de6a44a3SBarry Smith                                 v[19]*sum4+v[24]*sum5;
497*de6a44a3SBarry Smith       }
498*de6a44a3SBarry Smith       break;
499*de6a44a3SBarry Smith     default: {
500*de6a44a3SBarry Smith       /* forward solve the lower triangular */
501*de6a44a3SBarry Smith       PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar));
502*de6a44a3SBarry Smith       for ( i=1; i<n; i++ ) {
503*de6a44a3SBarry Smith         v   = aa + bs2*ai[i];
504*de6a44a3SBarry Smith         vi  = aj + ai[i];
505*de6a44a3SBarry Smith         nz  = a->diag[i] - ai[i];
506*de6a44a3SBarry Smith         sum = tmp + bs*i;
507*de6a44a3SBarry Smith         PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));
508*de6a44a3SBarry Smith         while (nz--) {
509*de6a44a3SBarry Smith           LAgemv_("N",&bs,&bs,&_DMOne,v,&bs,tmp+bs*(*vi++),&_One,&_DOne,sum,&_One);
510*de6a44a3SBarry Smith           v += bs2;
511*de6a44a3SBarry Smith         }
512*de6a44a3SBarry Smith       }
513*de6a44a3SBarry Smith       /* backward solve the upper triangular */
514*de6a44a3SBarry Smith       lsum = a->solve_work + a->n;
515*de6a44a3SBarry Smith       for ( i=n-1; i>=0; i-- ){
516*de6a44a3SBarry Smith         v   = aa + bs2*(a->diag[i] + 1);
517*de6a44a3SBarry Smith         vi  = aj + a->diag[i] + 1;
518*de6a44a3SBarry Smith         nz  = ai[i+1] - a->diag[i] - 1;
519*de6a44a3SBarry Smith         PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));
520*de6a44a3SBarry Smith         while (nz--) {
521*de6a44a3SBarry Smith           LAgemv_("N",&bs,&bs,&_DMOne,v,&bs,tmp+bs*(*vi++),&_One,&_DOne,lsum,&_One);
522*de6a44a3SBarry Smith           v += bs2;
523*de6a44a3SBarry Smith         }
524*de6a44a3SBarry Smith         LAgemv_("N",&bs,&bs,&_DOne,aa+bs2*a->diag[i],&bs,lsum,&_One,&_DZero,
525*de6a44a3SBarry Smith                 tmp+i*bs,&_One);
526*de6a44a3SBarry Smith         PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));
527*de6a44a3SBarry Smith       }
528*de6a44a3SBarry Smith     }
5295d517e7eSBarry Smith   }
5305d517e7eSBarry Smith 
5315d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
5325d517e7eSBarry Smith   ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
533*de6a44a3SBarry Smith   PLogFlops(2*bs2*a->nz - a->n);
5345d517e7eSBarry Smith   return 0;
5355d517e7eSBarry Smith }
5365d517e7eSBarry Smith 
5375d517e7eSBarry Smith /* ----------------------------------------------------------------*/
538ec3a8b7bSBarry Smith /*
539ec3a8b7bSBarry Smith      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
540ec3a8b7bSBarry Smith    except that the data structure of Mat_SeqAIJ is slightly different.
541ec3a8b7bSBarry Smith    Not a good example of code reuse.
542ec3a8b7bSBarry Smith    */
543ec3a8b7bSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels,
544ec3a8b7bSBarry Smith                                  Mat *fact)
5455d517e7eSBarry Smith {
546ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
5475d517e7eSBarry Smith   IS          isicol;
548ec3a8b7bSBarry Smith   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
5495d517e7eSBarry Smith   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
5505d517e7eSBarry Smith   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
551*de6a44a3SBarry Smith   int         incrlev,nnz,i,bs = a->bs;
5525d517e7eSBarry Smith 
553ec3a8b7bSBarry Smith   if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square");
554ec3a8b7bSBarry Smith   if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation");
555ec3a8b7bSBarry Smith   if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation");
5565d517e7eSBarry Smith 
5575d517e7eSBarry Smith   /* special case that simply copies fill pattern */
5585d517e7eSBarry Smith   if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) {
559ec3a8b7bSBarry Smith     ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
5605d517e7eSBarry Smith     (*fact)->factor = FACTOR_LU;
561ec3a8b7bSBarry Smith     b               = (Mat_SeqBAIJ *) (*fact)->data;
5625d517e7eSBarry Smith     if (!b->diag) {
563ec3a8b7bSBarry Smith       ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
5645d517e7eSBarry Smith     }
5655d517e7eSBarry Smith     b->row        = isrow;
5665d517e7eSBarry Smith     b->col        = iscol;
567*de6a44a3SBarry Smith     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
5685d517e7eSBarry Smith     return 0;
5695d517e7eSBarry Smith   }
5705d517e7eSBarry Smith 
5715d517e7eSBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
5725d517e7eSBarry Smith   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
5735d517e7eSBarry Smith   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
5745d517e7eSBarry Smith 
5755d517e7eSBarry Smith   /* get new row pointers */
5765d517e7eSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
577*de6a44a3SBarry Smith   ainew[0] = 0;
5785d517e7eSBarry Smith   /* don't know how many column pointers are needed so estimate */
579*de6a44a3SBarry Smith   jmax = (int) (f*ai[n] + 1);
5805d517e7eSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
5815d517e7eSBarry Smith   /* ajfill is level of fill for each fill entry */
5825d517e7eSBarry Smith   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
5835d517e7eSBarry Smith   /* fill is a linked list of nonzeros in active row */
5845d517e7eSBarry Smith   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
5855d517e7eSBarry Smith   /* im is level for each filled value */
5865d517e7eSBarry Smith   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
5875d517e7eSBarry Smith   /* dloc is location of diagonal in factor */
5885d517e7eSBarry Smith   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
5895d517e7eSBarry Smith   dloc[0]  = 0;
5905d517e7eSBarry Smith   for ( prow=0; prow<n; prow++ ) {
5915d517e7eSBarry Smith     /* first copy previous fill into linked list */
5925d517e7eSBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
593*de6a44a3SBarry Smith     xi      = aj + ai[r[prow]];
5945d517e7eSBarry Smith     fill[n] = n;
5955d517e7eSBarry Smith     while (nz--) {
5965d517e7eSBarry Smith       fm  = n;
597*de6a44a3SBarry Smith       idx = ic[*xi++];
5985d517e7eSBarry Smith       do {
5995d517e7eSBarry Smith         m  = fm;
6005d517e7eSBarry Smith         fm = fill[m];
6015d517e7eSBarry Smith       } while (fm < idx);
6025d517e7eSBarry Smith       fill[m]   = idx;
6035d517e7eSBarry Smith       fill[idx] = fm;
6045d517e7eSBarry Smith       im[idx]   = 0;
6055d517e7eSBarry Smith     }
6065d517e7eSBarry Smith     nzi = 0;
6075d517e7eSBarry Smith     row = fill[n];
6085d517e7eSBarry Smith     while ( row < prow ) {
6095d517e7eSBarry Smith       incrlev = im[row] + 1;
6105d517e7eSBarry Smith       nz      = dloc[row];
611*de6a44a3SBarry Smith       xi      = ajnew  + ainew[row] + nz;
612*de6a44a3SBarry Smith       flev    = ajfill + ainew[row] + nz + 1;
6135d517e7eSBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
614*de6a44a3SBarry Smith       if (*xi++ != row) {
615ec3a8b7bSBarry Smith         SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot");
6165d517e7eSBarry Smith       }
6175d517e7eSBarry Smith       fm      = row;
6185d517e7eSBarry Smith       while (nnz-- > 0) {
619*de6a44a3SBarry Smith         idx = *xi++;
6205d517e7eSBarry Smith         if (*flev + incrlev > levels) {
6215d517e7eSBarry Smith           flev++;
6225d517e7eSBarry Smith           continue;
6235d517e7eSBarry Smith         }
6245d517e7eSBarry Smith         do {
6255d517e7eSBarry Smith           m  = fm;
6265d517e7eSBarry Smith           fm = fill[m];
6275d517e7eSBarry Smith         } while (fm < idx);
6285d517e7eSBarry Smith         if (fm != idx) {
6295d517e7eSBarry Smith           im[idx]   = *flev + incrlev;
6305d517e7eSBarry Smith           fill[m]   = idx;
6315d517e7eSBarry Smith           fill[idx] = fm;
6325d517e7eSBarry Smith           fm        = idx;
6335d517e7eSBarry Smith           nzf++;
6345d517e7eSBarry Smith         }
6355d517e7eSBarry Smith         else {
6365d517e7eSBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
6375d517e7eSBarry Smith         }
6385d517e7eSBarry Smith         flev++;
6395d517e7eSBarry Smith       }
6405d517e7eSBarry Smith       row = fill[row];
6415d517e7eSBarry Smith       nzi++;
6425d517e7eSBarry Smith     }
6435d517e7eSBarry Smith     /* copy new filled row into permanent storage */
6445d517e7eSBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
645*de6a44a3SBarry Smith     if (ainew[prow+1] > jmax) {
6465d517e7eSBarry Smith       /* allocate a longer ajnew */
6475d517e7eSBarry Smith       int maxadd;
648*de6a44a3SBarry Smith       maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n);
6495d517e7eSBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
6505d517e7eSBarry Smith       jmax += maxadd;
6515d517e7eSBarry Smith       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
652*de6a44a3SBarry Smith       PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));
6535d517e7eSBarry Smith       PetscFree(ajnew);
6545d517e7eSBarry Smith       ajnew = xi;
6555d517e7eSBarry Smith       /* allocate a longer ajfill */
6565d517e7eSBarry Smith       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
657*de6a44a3SBarry Smith       PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));
6585d517e7eSBarry Smith       PetscFree(ajfill);
6595d517e7eSBarry Smith       ajfill = xi;
6605d517e7eSBarry Smith       realloc++;
6615d517e7eSBarry Smith     }
662*de6a44a3SBarry Smith     xi          = ajnew + ainew[prow];
663*de6a44a3SBarry Smith     flev        = ajfill + ainew[prow];
6645d517e7eSBarry Smith     dloc[prow]  = nzi;
6655d517e7eSBarry Smith     fm          = fill[n];
6665d517e7eSBarry Smith     while (nzf--) {
667*de6a44a3SBarry Smith       *xi++   = fm;
6685d517e7eSBarry Smith       *flev++ = im[fm];
6695d517e7eSBarry Smith       fm      = fill[fm];
6705d517e7eSBarry Smith     }
6715d517e7eSBarry Smith   }
6725d517e7eSBarry Smith   PetscFree(ajfill);
6735d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
6745d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
6755d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
6765d517e7eSBarry Smith   PetscFree(fill); PetscFree(im);
6775d517e7eSBarry Smith 
6785d517e7eSBarry Smith   PLogInfo((PetscObject)A,
679ec3a8b7bSBarry Smith     "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n",
6805d517e7eSBarry Smith                              realloc,f,((double)ainew[n])/((double)ai[prow]));
6815d517e7eSBarry Smith 
6825d517e7eSBarry Smith   /* put together the new matrix */
683ec3a8b7bSBarry Smith   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
684ec3a8b7bSBarry Smith   b = (Mat_SeqBAIJ *) (*fact)->data;
6855d517e7eSBarry Smith   PetscFree(b->imax);
6865d517e7eSBarry Smith   b->singlemalloc = 0;
687*de6a44a3SBarry Smith   len = bs*bs*ainew[n]*sizeof(Scalar);
6885d517e7eSBarry Smith   /* the next line frees the default space generated by the Create() */
6895d517e7eSBarry Smith   PetscFree(b->a); PetscFree(b->ilen);
6905d517e7eSBarry Smith   b->a          = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
6915d517e7eSBarry Smith   b->j          = ajnew;
6925d517e7eSBarry Smith   b->i          = ainew;
6935d517e7eSBarry Smith   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
6945d517e7eSBarry Smith   b->diag       = dloc;
6955d517e7eSBarry Smith   b->ilen       = 0;
6965d517e7eSBarry Smith   b->imax       = 0;
6975d517e7eSBarry Smith   b->row        = isrow;
6985d517e7eSBarry Smith   b->col        = iscol;
699*de6a44a3SBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));
7005d517e7eSBarry Smith   CHKPTRQ(b->solve_work);
7015d517e7eSBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
7025d517e7eSBarry Smith      Allocate dloc, solve_work, new a, new j */
703*de6a44a3SBarry Smith   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs*bs*ainew[n]*sizeof(Scalar));
704*de6a44a3SBarry Smith   b->maxnz          = b->nz = ainew[n];
7055d517e7eSBarry Smith   (*fact)->factor   = FACTOR_LU;
7065d517e7eSBarry Smith   return 0;
7075d517e7eSBarry Smith }
7085d517e7eSBarry Smith 
7095d517e7eSBarry Smith 
7105d517e7eSBarry Smith 
7115d517e7eSBarry Smith 
712