xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision ec3a8b7b2b6ff99871c5685f662fcbd7a6178392)
15d517e7eSBarry Smith #ifndef lint
2*ec3a8b7bSBarry Smith static char vcid[] = "$Id: baijfact.c,v 1.1 1996/02/13 22:33:11 bsmith Exp bsmith $";
35d517e7eSBarry Smith #endif
45d517e7eSBarry Smith /*
5*ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
65d517e7eSBarry Smith */
75d517e7eSBarry Smith 
8*ec3a8b7bSBarry Smith #include "baij.h"
9*ec3a8b7bSBarry Smith 
10*ec3a8b7bSBarry Smith /*
11*ec3a8b7bSBarry Smith     The symbolic factorization code is identical to that for AIJ format,
12*ec3a8b7bSBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
13*ec3a8b7bSBarry Smith   NOT good code reuse.
14*ec3a8b7bSBarry Smith */
15*ec3a8b7bSBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
165d517e7eSBarry Smith {
17*ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
185d517e7eSBarry Smith   IS          isicol;
19*ec3a8b7bSBarry Smith   int         *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j;
205d517e7eSBarry Smith   int         *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
215d517e7eSBarry Smith   int         *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im;
22*ec3a8b7bSBarry Smith   int         bs = a->bs;
235d517e7eSBarry Smith 
24*ec3a8b7bSBarry Smith   if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square");
25*ec3a8b7bSBarry Smith   if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation");
26*ec3a8b7bSBarry Smith   if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation");
275d517e7eSBarry Smith 
285d517e7eSBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
295d517e7eSBarry Smith   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
305d517e7eSBarry Smith 
315d517e7eSBarry Smith   /* get new row pointers */
325d517e7eSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
335d517e7eSBarry Smith   ainew[0] = -shift;
345d517e7eSBarry Smith   /* don't know how many column pointers are needed so estimate */
355d517e7eSBarry Smith   jmax = (int) (f*ai[n]+(!shift));
365d517e7eSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
375d517e7eSBarry Smith   /* fill is a linked list of nonzeros in active row */
385d517e7eSBarry Smith   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill);
395d517e7eSBarry Smith   im = fill + n + 1;
405d517e7eSBarry Smith   /* idnew is location of diagonal in factor */
415d517e7eSBarry Smith   idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew);
425d517e7eSBarry Smith   idnew[0] = -shift;
435d517e7eSBarry Smith 
445d517e7eSBarry Smith   for ( i=0; i<n; i++ ) {
455d517e7eSBarry Smith     /* first copy previous fill into linked list */
465d517e7eSBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
475d517e7eSBarry Smith     ajtmp   = aj + ai[r[i]] + shift;
485d517e7eSBarry Smith     fill[n] = n;
495d517e7eSBarry Smith     while (nz--) {
505d517e7eSBarry Smith       fm  = n;
515d517e7eSBarry Smith       idx = ic[*ajtmp++ + shift];
525d517e7eSBarry Smith       do {
535d517e7eSBarry Smith         m  = fm;
545d517e7eSBarry Smith         fm = fill[m];
555d517e7eSBarry Smith       } while (fm < idx);
565d517e7eSBarry Smith       fill[m]   = idx;
575d517e7eSBarry Smith       fill[idx] = fm;
585d517e7eSBarry Smith     }
595d517e7eSBarry Smith     row = fill[n];
605d517e7eSBarry Smith     while ( row < i ) {
615d517e7eSBarry Smith       ajtmp = ajnew + idnew[row] + (!shift);
625d517e7eSBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
635d517e7eSBarry Smith       nz    = im[row] - nzbd;
645d517e7eSBarry Smith       fm    = row;
655d517e7eSBarry Smith       while (nz-- > 0) {
665d517e7eSBarry Smith         idx = *ajtmp++ + shift;
675d517e7eSBarry Smith         nzbd++;
685d517e7eSBarry Smith         if (idx == i) im[row] = nzbd;
695d517e7eSBarry Smith         do {
705d517e7eSBarry Smith           m  = fm;
715d517e7eSBarry Smith           fm = fill[m];
725d517e7eSBarry Smith         } while (fm < idx);
735d517e7eSBarry Smith         if (fm != idx) {
745d517e7eSBarry Smith           fill[m]   = idx;
755d517e7eSBarry Smith           fill[idx] = fm;
765d517e7eSBarry Smith           fm        = idx;
775d517e7eSBarry Smith           nnz++;
785d517e7eSBarry Smith         }
795d517e7eSBarry Smith       }
805d517e7eSBarry Smith       row = fill[row];
815d517e7eSBarry Smith     }
825d517e7eSBarry Smith     /* copy new filled row into permanent storage */
835d517e7eSBarry Smith     ainew[i+1] = ainew[i] + nnz;
845d517e7eSBarry Smith     if (ainew[i+1] > jmax+1) {
855d517e7eSBarry Smith       /* allocate a longer ajnew */
865d517e7eSBarry Smith       int maxadd;
875d517e7eSBarry Smith       maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n);
885d517e7eSBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
895d517e7eSBarry Smith       jmax += maxadd;
905d517e7eSBarry Smith       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
915d517e7eSBarry Smith       PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
925d517e7eSBarry Smith       PetscFree(ajnew);
935d517e7eSBarry Smith       ajnew = ajtmp;
945d517e7eSBarry Smith       realloc++; /* count how many times we realloc */
955d517e7eSBarry Smith     }
965d517e7eSBarry Smith     ajtmp = ajnew + ainew[i] + shift;
975d517e7eSBarry Smith     fm    = fill[n];
985d517e7eSBarry Smith     nzi   = 0;
995d517e7eSBarry Smith     im[i] = nnz;
1005d517e7eSBarry Smith     while (nnz--) {
1015d517e7eSBarry Smith       if (fm < i) nzi++;
1025d517e7eSBarry Smith       *ajtmp++ = fm - shift;
1035d517e7eSBarry Smith       fm       = fill[fm];
1045d517e7eSBarry Smith     }
1055d517e7eSBarry Smith     idnew[i] = ainew[i] + nzi;
1065d517e7eSBarry Smith   }
1075d517e7eSBarry Smith 
1085d517e7eSBarry Smith   PLogInfo((PetscObject)A,
109*ec3a8b7bSBarry Smith     "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
1105d517e7eSBarry Smith                              realloc,f,((double)ainew[n])/((double)ai[i]));
1115d517e7eSBarry Smith 
1125d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
1135d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
1145d517e7eSBarry Smith 
1155d517e7eSBarry Smith   PetscFree(fill);
1165d517e7eSBarry Smith 
1175d517e7eSBarry Smith   /* put together the new matrix */
118*ec3a8b7bSBarry Smith   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr);
1195d517e7eSBarry Smith   PLogObjectParent(*B,isicol);
1205d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
121*ec3a8b7bSBarry Smith   b = (Mat_SeqBAIJ *) (*B)->data;
1225d517e7eSBarry Smith   PetscFree(b->imax);
1235d517e7eSBarry Smith   b->singlemalloc = 0;
1245d517e7eSBarry Smith   len             = (ainew[n] + shift)*sizeof(Scalar);
1255d517e7eSBarry Smith   /* the next line frees the default space generated by the Create() */
1265d517e7eSBarry Smith   PetscFree(b->a); PetscFree(b->ilen);
127*ec3a8b7bSBarry Smith   b->a          = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a);
1285d517e7eSBarry Smith   b->j          = ajnew;
1295d517e7eSBarry Smith   b->i          = ainew;
1305d517e7eSBarry Smith   b->diag       = idnew;
1315d517e7eSBarry Smith   b->ilen       = 0;
1325d517e7eSBarry Smith   b->imax       = 0;
1335d517e7eSBarry Smith   b->row        = isrow;
1345d517e7eSBarry Smith   b->col        = iscol;
1355d517e7eSBarry Smith   b->solve_work = (Scalar *) PetscMalloc( n*sizeof(Scalar));
1365d517e7eSBarry Smith   CHKPTRQ(b->solve_work);
1375d517e7eSBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
1385d517e7eSBarry Smith      Allocate idnew, solve_work, new a, new j */
1395d517e7eSBarry Smith   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
1405d517e7eSBarry Smith   b->maxnz = b->nz = ainew[n] + shift;
1415d517e7eSBarry Smith 
1425d517e7eSBarry Smith   return 0;
1435d517e7eSBarry Smith }
1445d517e7eSBarry Smith 
145*ec3a8b7bSBarry Smith #include "pinclude/plapack.h"
146*ec3a8b7bSBarry Smith 
147*ec3a8b7bSBarry Smith #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));}
148*ec3a8b7bSBarry Smith 
149*ec3a8b7bSBarry Smith #define BlockCopy(v_in,v_out,idx_in,idx_out) \
150*ec3a8b7bSBarry Smith   {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));}
151*ec3a8b7bSBarry Smith 
152*ec3a8b7bSBarry Smith #define BlockInvert(vv,idx) \
153*ec3a8b7bSBarry Smith   { int _i,info; Scalar *w = vv + bs2*idx; \
154*ec3a8b7bSBarry Smith printf("vv idx %g %d \n",*vv,idx); \
155*ec3a8b7bSBarry Smith     LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \
156*ec3a8b7bSBarry Smith printf("bs %d w %g\n",bs,*w);\
157*ec3a8b7bSBarry Smith     PetscMemzero(v_work,bs2*sizeof(Scalar));  \
158*ec3a8b7bSBarry Smith printf("vwork %g\n",*v_work);\
159*ec3a8b7bSBarry Smith     for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \
160*ec3a8b7bSBarry Smith printf("vwork %g\n",*v_work);\
161*ec3a8b7bSBarry Smith     LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\
162*ec3a8b7bSBarry Smith printf("w %g vwork %g\n",*w,*v_work);\
163*ec3a8b7bSBarry Smith     PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \
164*ec3a8b7bSBarry Smith printf("w %g vwork %g\n",*w,*v_work);\
165*ec3a8b7bSBarry Smith   }
166*ec3a8b7bSBarry Smith 
167*ec3a8b7bSBarry Smith #define BlockMult(v1,v2,v3
168*ec3a8b7bSBarry Smith /* ----------------------------------------------------------- */
169*ec3a8b7bSBarry Smith int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B)
1705d517e7eSBarry Smith {
1715d517e7eSBarry Smith   Mat         C = *B;
172*ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data;
1735d517e7eSBarry Smith   IS          iscol = b->col, isrow = b->row, isicol;
174*ec3a8b7bSBarry Smith   int         *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j;
1755d517e7eSBarry Smith   int         *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
176*ec3a8b7bSBarry Smith   int         *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots;
177*ec3a8b7bSBarry Smith   Scalar      *rtmp,*v, *pc, multiplier,*v_work;
1785d517e7eSBarry Smith   /* These declarations are for optimizations.  They reduce the number of
1795d517e7eSBarry Smith      memory references that are made by locally storing information; the
1805d517e7eSBarry Smith      word "register" used here with pointers can be viewed as "private" or
1815d517e7eSBarry Smith      "known only to me"
1825d517e7eSBarry Smith    */
183*ec3a8b7bSBarry Smith   register Scalar *pv, *rtmps;
1845d517e7eSBarry Smith   register int    *pj;
1855d517e7eSBarry Smith 
1865d517e7eSBarry Smith   ierr  = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
1875d517e7eSBarry Smith   PLogObjectParent(*B,isicol);
1885d517e7eSBarry Smith   ierr  = ISGetIndices(isrow,&r); CHKERRQ(ierr);
1895d517e7eSBarry Smith   ierr  = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
190*ec3a8b7bSBarry Smith   rtmp  = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp);
1915d517e7eSBarry Smith   rtmps = rtmp + shift; ics = ic + shift;
1925d517e7eSBarry Smith 
193*ec3a8b7bSBarry Smith   /* generate work space needed by dense LU factorization */
194*ec3a8b7bSBarry Smith   v_work   = (Scalar *) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(v_work);
195*ec3a8b7bSBarry Smith   v_pivots = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(v_pivots);
1965d517e7eSBarry Smith 
1975d517e7eSBarry Smith   for ( i=0; i<n; i++ ) {
1985d517e7eSBarry Smith     nz    = ai[i+1] - ai[i];
1995d517e7eSBarry Smith     ajtmp = aj + ai[i] + shift;
200*ec3a8b7bSBarry Smith     for  ( j=0; j<nz; j++ ) BlockZero(rtmps,ajtmp[j]);
201*ec3a8b7bSBarry Smith malloc_verify();
2025d517e7eSBarry Smith     /* load in initial (unfactored row) */
2035d517e7eSBarry Smith     nz       = a->i[r[i]+1] - a->i[r[i]];
2045d517e7eSBarry Smith     ajtmpold = a->j + a->i[r[i]] + shift;
205*ec3a8b7bSBarry Smith     v        = a->a + bs2*a->i[r[i]] + shift;
206*ec3a8b7bSBarry Smith     for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ics[ajtmpold[j]]);
207*ec3a8b7bSBarry Smith malloc_verify();
2085d517e7eSBarry Smith 
2095d517e7eSBarry Smith     row = *ajtmp++ + shift;
2105d517e7eSBarry Smith     while (row < i) {
2115d517e7eSBarry Smith       pc = rtmp + row;
2125d517e7eSBarry Smith       if (*pc != 0.0) {
213*ec3a8b7bSBarry Smith         pv         = b->a + bs2*diag_offset[row] + shift;
2145d517e7eSBarry Smith         pj         = b->j + diag_offset[row] + (!shift);
215*ec3a8b7bSBarry Smith         multiplier = *pc * (*pv);
2165d517e7eSBarry Smith         *pc        = multiplier;
2175d517e7eSBarry Smith         nz         = ai[row+1] - diag_offset[row] - 1;
2185d517e7eSBarry Smith         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
219*ec3a8b7bSBarry Smith         pv        += bs2;
2205d517e7eSBarry Smith         PLogFlops(2*nz);
2215d517e7eSBarry Smith       }
2225d517e7eSBarry Smith       row = *ajtmp++ + shift;
2235d517e7eSBarry Smith     }
2245d517e7eSBarry Smith     /* finished row so stick it into b->a */
225*ec3a8b7bSBarry Smith     pv = b->a + bs2*ai[i] + shift;
2265d517e7eSBarry Smith     pj = b->j + ai[i] + shift;
2275d517e7eSBarry Smith     nz = ai[i+1] - ai[i];
228*ec3a8b7bSBarry Smith     for ( j=0; j<nz; j++ ) BlockCopy(rtmps,pv,pj[j],j);
229*ec3a8b7bSBarry Smith malloc_verify();
2305d517e7eSBarry Smith     diag = diag_offset[i] - ai[i];
231*ec3a8b7bSBarry Smith     /* invert diagonal block */
232*ec3a8b7bSBarry Smith     BlockInvert(pv,diag);
233*ec3a8b7bSBarry Smith malloc_verify();
2345d517e7eSBarry Smith   }
2355d517e7eSBarry Smith 
2365d517e7eSBarry Smith   PetscFree(rtmp);
2375d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
2385d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
2395d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
2405d517e7eSBarry Smith   C->factor = FACTOR_LU;
2415d517e7eSBarry Smith   C->assembled = PETSC_TRUE;
2425d517e7eSBarry Smith   PLogFlops(b->n);
2435d517e7eSBarry Smith   return 0;
2445d517e7eSBarry Smith }
2455d517e7eSBarry Smith /* ----------------------------------------------------------- */
246*ec3a8b7bSBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f)
2475d517e7eSBarry Smith {
248*ec3a8b7bSBarry Smith   Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data;
2495d517e7eSBarry Smith   int         ierr;
2505d517e7eSBarry Smith   Mat         C;
2515d517e7eSBarry Smith 
252*ec3a8b7bSBarry Smith   ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr);
253*ec3a8b7bSBarry Smith   ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr);
2545d517e7eSBarry Smith 
2555d517e7eSBarry Smith   /* free all the data structures from mat */
2565d517e7eSBarry Smith   PetscFree(mat->a);
2575d517e7eSBarry Smith   if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);}
2585d517e7eSBarry Smith   if (mat->diag) PetscFree(mat->diag);
2595d517e7eSBarry Smith   if (mat->ilen) PetscFree(mat->ilen);
2605d517e7eSBarry Smith   if (mat->imax) PetscFree(mat->imax);
2615d517e7eSBarry Smith   if (mat->solve_work) PetscFree(mat->solve_work);
2625d517e7eSBarry Smith   PetscFree(mat);
2635d517e7eSBarry Smith 
2645d517e7eSBarry Smith   PetscMemcpy(A,C,sizeof(struct _Mat));
2655d517e7eSBarry Smith   PetscHeaderDestroy(C);
2665d517e7eSBarry Smith   return 0;
2675d517e7eSBarry Smith }
2685d517e7eSBarry Smith /* ----------------------------------------------------------- */
269*ec3a8b7bSBarry Smith int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx)
2705d517e7eSBarry Smith {
271*ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
2725d517e7eSBarry Smith   IS          iscol = a->col, isrow = a->row;
2735d517e7eSBarry Smith   int         *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
2745d517e7eSBarry Smith   int         nz,shift = a->indexshift;
2755d517e7eSBarry Smith   Scalar      *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
2765d517e7eSBarry Smith 
277*ec3a8b7bSBarry Smith   if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix");
2785d517e7eSBarry Smith 
2795d517e7eSBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
2805d517e7eSBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
2815d517e7eSBarry Smith   tmp  = a->solve_work;
2825d517e7eSBarry Smith 
2835d517e7eSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2845d517e7eSBarry Smith   ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1);
2855d517e7eSBarry Smith 
2865d517e7eSBarry Smith   /* forward solve the lower triangular */
2875d517e7eSBarry Smith   tmp[0] = b[*r++];
2885d517e7eSBarry Smith   tmps   = tmp + shift;
2895d517e7eSBarry Smith   for ( i=1; i<n; i++ ) {
2905d517e7eSBarry Smith     v   = aa + ai[i] + shift;
2915d517e7eSBarry Smith     vi  = aj + ai[i] + shift;
2925d517e7eSBarry Smith     nz  = a->diag[i] - ai[i];
2935d517e7eSBarry Smith     sum = b[*r++];
2945d517e7eSBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
2955d517e7eSBarry Smith     tmp[i] = sum;
2965d517e7eSBarry Smith   }
2975d517e7eSBarry Smith 
2985d517e7eSBarry Smith   /* backward solve the upper triangular */
2995d517e7eSBarry Smith   for ( i=n-1; i>=0; i-- ){
3005d517e7eSBarry Smith     v   = aa + a->diag[i] + (!shift);
3015d517e7eSBarry Smith     vi  = aj + a->diag[i] + (!shift);
3025d517e7eSBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
3035d517e7eSBarry Smith     sum = tmp[i];
3045d517e7eSBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
3055d517e7eSBarry Smith     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
3065d517e7eSBarry Smith   }
3075d517e7eSBarry Smith 
3085d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
3095d517e7eSBarry Smith   ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
3105d517e7eSBarry Smith   PLogFlops(2*a->nz - a->n);
3115d517e7eSBarry Smith   return 0;
3125d517e7eSBarry Smith }
3135d517e7eSBarry Smith 
3145d517e7eSBarry Smith /* ----------------------------------------------------------------*/
315*ec3a8b7bSBarry Smith /*
316*ec3a8b7bSBarry Smith      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
317*ec3a8b7bSBarry Smith    except that the data structure of Mat_SeqAIJ is slightly different.
318*ec3a8b7bSBarry Smith    Not a good example of code reuse.
319*ec3a8b7bSBarry Smith    */
320*ec3a8b7bSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels,
321*ec3a8b7bSBarry Smith                                  Mat *fact)
3225d517e7eSBarry Smith {
323*ec3a8b7bSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
3245d517e7eSBarry Smith   IS          isicol;
325*ec3a8b7bSBarry Smith   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
3265d517e7eSBarry Smith   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
3275d517e7eSBarry Smith   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
328*ec3a8b7bSBarry Smith   int         incrlev,nnz,i,shift = a->indexshift,bs = a->bs;
3295d517e7eSBarry Smith 
330*ec3a8b7bSBarry Smith   if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square");
331*ec3a8b7bSBarry Smith   if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation");
332*ec3a8b7bSBarry Smith   if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation");
3335d517e7eSBarry Smith 
3345d517e7eSBarry Smith   /* special case that simply copies fill pattern */
3355d517e7eSBarry Smith   if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) {
336*ec3a8b7bSBarry Smith     ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr);
3375d517e7eSBarry Smith     (*fact)->factor = FACTOR_LU;
338*ec3a8b7bSBarry Smith     b               = (Mat_SeqBAIJ *) (*fact)->data;
3395d517e7eSBarry Smith     if (!b->diag) {
340*ec3a8b7bSBarry Smith       ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
3415d517e7eSBarry Smith     }
3425d517e7eSBarry Smith     b->row        = isrow;
3435d517e7eSBarry Smith     b->col        = iscol;
3445d517e7eSBarry Smith     b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
3455d517e7eSBarry Smith     return 0;
3465d517e7eSBarry Smith   }
3475d517e7eSBarry Smith 
3485d517e7eSBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
3495d517e7eSBarry Smith   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
3505d517e7eSBarry Smith   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
3515d517e7eSBarry Smith 
3525d517e7eSBarry Smith   /* get new row pointers */
3535d517e7eSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
3545d517e7eSBarry Smith   ainew[0] = -shift;
3555d517e7eSBarry Smith   /* don't know how many column pointers are needed so estimate */
3565d517e7eSBarry Smith   jmax = (int) (f*(ai[n]+!shift));
3575d517e7eSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
3585d517e7eSBarry Smith   /* ajfill is level of fill for each fill entry */
3595d517e7eSBarry Smith   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
3605d517e7eSBarry Smith   /* fill is a linked list of nonzeros in active row */
3615d517e7eSBarry Smith   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
3625d517e7eSBarry Smith   /* im is level for each filled value */
3635d517e7eSBarry Smith   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
3645d517e7eSBarry Smith   /* dloc is location of diagonal in factor */
3655d517e7eSBarry Smith   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
3665d517e7eSBarry Smith   dloc[0]  = 0;
3675d517e7eSBarry Smith   for ( prow=0; prow<n; prow++ ) {
3685d517e7eSBarry Smith     /* first copy previous fill into linked list */
3695d517e7eSBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
3705d517e7eSBarry Smith     xi      = aj + ai[r[prow]] + shift;
3715d517e7eSBarry Smith     fill[n] = n;
3725d517e7eSBarry Smith     while (nz--) {
3735d517e7eSBarry Smith       fm  = n;
3745d517e7eSBarry Smith       idx = ic[*xi++ + shift];
3755d517e7eSBarry Smith       do {
3765d517e7eSBarry Smith         m  = fm;
3775d517e7eSBarry Smith         fm = fill[m];
3785d517e7eSBarry Smith       } while (fm < idx);
3795d517e7eSBarry Smith       fill[m]   = idx;
3805d517e7eSBarry Smith       fill[idx] = fm;
3815d517e7eSBarry Smith       im[idx]   = 0;
3825d517e7eSBarry Smith     }
3835d517e7eSBarry Smith     nzi = 0;
3845d517e7eSBarry Smith     row = fill[n];
3855d517e7eSBarry Smith     while ( row < prow ) {
3865d517e7eSBarry Smith       incrlev = im[row] + 1;
3875d517e7eSBarry Smith       nz      = dloc[row];
3885d517e7eSBarry Smith       xi      = ajnew  + ainew[row] + shift + nz;
3895d517e7eSBarry Smith       flev    = ajfill + ainew[row] + shift + nz + 1;
3905d517e7eSBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
3915d517e7eSBarry Smith       if (*xi++ + shift != row) {
392*ec3a8b7bSBarry Smith         SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot");
3935d517e7eSBarry Smith       }
3945d517e7eSBarry Smith       fm      = row;
3955d517e7eSBarry Smith       while (nnz-- > 0) {
3965d517e7eSBarry Smith         idx = *xi++ + shift;
3975d517e7eSBarry Smith         if (*flev + incrlev > levels) {
3985d517e7eSBarry Smith           flev++;
3995d517e7eSBarry Smith           continue;
4005d517e7eSBarry Smith         }
4015d517e7eSBarry Smith         do {
4025d517e7eSBarry Smith           m  = fm;
4035d517e7eSBarry Smith           fm = fill[m];
4045d517e7eSBarry Smith         } while (fm < idx);
4055d517e7eSBarry Smith         if (fm != idx) {
4065d517e7eSBarry Smith           im[idx]   = *flev + incrlev;
4075d517e7eSBarry Smith           fill[m]   = idx;
4085d517e7eSBarry Smith           fill[idx] = fm;
4095d517e7eSBarry Smith           fm        = idx;
4105d517e7eSBarry Smith           nzf++;
4115d517e7eSBarry Smith         }
4125d517e7eSBarry Smith         else {
4135d517e7eSBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
4145d517e7eSBarry Smith         }
4155d517e7eSBarry Smith         flev++;
4165d517e7eSBarry Smith       }
4175d517e7eSBarry Smith       row = fill[row];
4185d517e7eSBarry Smith       nzi++;
4195d517e7eSBarry Smith     }
4205d517e7eSBarry Smith     /* copy new filled row into permanent storage */
4215d517e7eSBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
4225d517e7eSBarry Smith     if (ainew[prow+1] > jmax-shift) {
4235d517e7eSBarry Smith       /* allocate a longer ajnew */
4245d517e7eSBarry Smith       int maxadd;
4255d517e7eSBarry Smith       maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n);
4265d517e7eSBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
4275d517e7eSBarry Smith       jmax += maxadd;
4285d517e7eSBarry Smith       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
4295d517e7eSBarry Smith       PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
4305d517e7eSBarry Smith       PetscFree(ajnew);
4315d517e7eSBarry Smith       ajnew = xi;
4325d517e7eSBarry Smith       /* allocate a longer ajfill */
4335d517e7eSBarry Smith       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
4345d517e7eSBarry Smith       PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
4355d517e7eSBarry Smith       PetscFree(ajfill);
4365d517e7eSBarry Smith       ajfill = xi;
4375d517e7eSBarry Smith       realloc++;
4385d517e7eSBarry Smith     }
4395d517e7eSBarry Smith     xi          = ajnew + ainew[prow] + shift;
4405d517e7eSBarry Smith     flev        = ajfill + ainew[prow] + shift;
4415d517e7eSBarry Smith     dloc[prow]  = nzi;
4425d517e7eSBarry Smith     fm          = fill[n];
4435d517e7eSBarry Smith     while (nzf--) {
4445d517e7eSBarry Smith       *xi++   = fm - shift;
4455d517e7eSBarry Smith       *flev++ = im[fm];
4465d517e7eSBarry Smith       fm      = fill[fm];
4475d517e7eSBarry Smith     }
4485d517e7eSBarry Smith   }
4495d517e7eSBarry Smith   PetscFree(ajfill);
4505d517e7eSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
4515d517e7eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
4525d517e7eSBarry Smith   ierr = ISDestroy(isicol); CHKERRQ(ierr);
4535d517e7eSBarry Smith   PetscFree(fill); PetscFree(im);
4545d517e7eSBarry Smith 
4555d517e7eSBarry Smith   PLogInfo((PetscObject)A,
456*ec3a8b7bSBarry Smith     "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n",
4575d517e7eSBarry Smith                              realloc,f,((double)ainew[n])/((double)ai[prow]));
4585d517e7eSBarry Smith 
4595d517e7eSBarry Smith   /* put together the new matrix */
460*ec3a8b7bSBarry Smith   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
461*ec3a8b7bSBarry Smith   b = (Mat_SeqBAIJ *) (*fact)->data;
4625d517e7eSBarry Smith   PetscFree(b->imax);
4635d517e7eSBarry Smith   b->singlemalloc = 0;
4645d517e7eSBarry Smith   len = (ainew[n] + shift)*sizeof(Scalar);
4655d517e7eSBarry Smith   /* the next line frees the default space generated by the Create() */
4665d517e7eSBarry Smith   PetscFree(b->a); PetscFree(b->ilen);
4675d517e7eSBarry Smith   b->a          = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
4685d517e7eSBarry Smith   b->j          = ajnew;
4695d517e7eSBarry Smith   b->i          = ainew;
4705d517e7eSBarry Smith   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
4715d517e7eSBarry Smith   b->diag       = dloc;
4725d517e7eSBarry Smith   b->ilen       = 0;
4735d517e7eSBarry Smith   b->imax       = 0;
4745d517e7eSBarry Smith   b->row        = isrow;
4755d517e7eSBarry Smith   b->col        = iscol;
4765d517e7eSBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));
4775d517e7eSBarry Smith   CHKPTRQ(b->solve_work);
4785d517e7eSBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
4795d517e7eSBarry Smith      Allocate dloc, solve_work, new a, new j */
4805d517e7eSBarry Smith   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
4815d517e7eSBarry Smith   b->maxnz          = b->nz = ainew[n] + shift;
4825d517e7eSBarry Smith   (*fact)->factor   = FACTOR_LU;
4835d517e7eSBarry Smith   return 0;
4845d517e7eSBarry Smith }
4855d517e7eSBarry Smith 
4865d517e7eSBarry Smith 
4875d517e7eSBarry Smith 
4885d517e7eSBarry Smith 
489