xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 83287d428b9a58025db0326f03de411e4695309b)
1*83287d42SBarry Smith /*$Id: baijfact.c,v 1.86 2000/11/28 17:29:14 bsmith Exp $*/
2*83287d42SBarry Smith /*
3*83287d42SBarry Smith     Factorization code for BAIJ format.
4*83287d42SBarry Smith */
5*83287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
6*83287d42SBarry Smith #include "src/vec/vecimpl.h"
7*83287d42SBarry Smith #include "src/inline/ilu.h"
8*83287d42SBarry Smith 
9*83287d42SBarry Smith /*
10*83287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
11*83287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
12*83287d42SBarry Smith   NOT good code reuse.
13*83287d42SBarry Smith */
14*83287d42SBarry Smith #undef __FUNC__
15*83287d42SBarry Smith #define __FUNC__ /*<a name="MatLUFactorSymbolic_SeqBAIJ"></a>*/"MatLUFactorSymbolic_SeqBAIJ"
16*83287d42SBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
17*83287d42SBarry Smith {
18*83287d42SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
19*83287d42SBarry Smith   IS          isicol;
20*83287d42SBarry Smith   int         *r,*ic,ierr,i,n = a->mbs,*ai = a->i,*aj = a->j;
21*83287d42SBarry Smith   int         *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2;
22*83287d42SBarry Smith   int         *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
23*83287d42SBarry Smith   PetscReal   f = 1.0;
24*83287d42SBarry Smith 
25*83287d42SBarry Smith   PetscFunctionBegin;
26*83287d42SBarry Smith   PetscValidHeaderSpecific(isrow,IS_COOKIE);
27*83287d42SBarry Smith   PetscValidHeaderSpecific(iscol,IS_COOKIE);
28*83287d42SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
29*83287d42SBarry Smith 
30*83287d42SBarry Smith   if (!isrow) {
31*83287d42SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr);
32*83287d42SBarry Smith   }
33*83287d42SBarry Smith   if (!iscol) {
34*83287d42SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr);
35*83287d42SBarry Smith   }
36*83287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
37*83287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
38*83287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
39*83287d42SBarry Smith 
40*83287d42SBarry Smith   if (info) f = info->fill;
41*83287d42SBarry Smith   /* get new row pointers */
42*83287d42SBarry Smith   ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew);
43*83287d42SBarry Smith   ainew[0] = 0;
44*83287d42SBarry Smith   /* don't know how many column pointers are needed so estimate */
45*83287d42SBarry Smith   jmax = (int)(f*ai[n] + 1);
46*83287d42SBarry Smith   ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew);
47*83287d42SBarry Smith   /* fill is a linked list of nonzeros in active row */
48*83287d42SBarry Smith   fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill);
49*83287d42SBarry Smith   im = fill + n + 1;
50*83287d42SBarry Smith   /* idnew is location of diagonal in factor */
51*83287d42SBarry Smith   idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew);
52*83287d42SBarry Smith   idnew[0] = 0;
53*83287d42SBarry Smith 
54*83287d42SBarry Smith   for (i=0; i<n; i++) {
55*83287d42SBarry Smith     /* first copy previous fill into linked list */
56*83287d42SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
57*83287d42SBarry Smith     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
58*83287d42SBarry Smith     ajtmp   = aj + ai[r[i]];
59*83287d42SBarry Smith     fill[n] = n;
60*83287d42SBarry Smith     while (nz--) {
61*83287d42SBarry Smith       fm  = n;
62*83287d42SBarry Smith       idx = ic[*ajtmp++];
63*83287d42SBarry Smith       do {
64*83287d42SBarry Smith         m  = fm;
65*83287d42SBarry Smith         fm = fill[m];
66*83287d42SBarry Smith       } while (fm < idx);
67*83287d42SBarry Smith       fill[m]   = idx;
68*83287d42SBarry Smith       fill[idx] = fm;
69*83287d42SBarry Smith     }
70*83287d42SBarry Smith     row = fill[n];
71*83287d42SBarry Smith     while (row < i) {
72*83287d42SBarry Smith       ajtmp = ajnew + idnew[row] + 1;
73*83287d42SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
74*83287d42SBarry Smith       nz    = im[row] - nzbd;
75*83287d42SBarry Smith       fm    = row;
76*83287d42SBarry Smith       while (nz-- > 0) {
77*83287d42SBarry Smith         idx = *ajtmp++;
78*83287d42SBarry Smith         nzbd++;
79*83287d42SBarry Smith         if (idx == i) im[row] = nzbd;
80*83287d42SBarry Smith         do {
81*83287d42SBarry Smith           m  = fm;
82*83287d42SBarry Smith           fm = fill[m];
83*83287d42SBarry Smith         } while (fm < idx);
84*83287d42SBarry Smith         if (fm != idx) {
85*83287d42SBarry Smith           fill[m]   = idx;
86*83287d42SBarry Smith           fill[idx] = fm;
87*83287d42SBarry Smith           fm        = idx;
88*83287d42SBarry Smith           nnz++;
89*83287d42SBarry Smith         }
90*83287d42SBarry Smith       }
91*83287d42SBarry Smith       row = fill[row];
92*83287d42SBarry Smith     }
93*83287d42SBarry Smith     /* copy new filled row into permanent storage */
94*83287d42SBarry Smith     ainew[i+1] = ainew[i] + nnz;
95*83287d42SBarry Smith     if (ainew[i+1] > jmax) {
96*83287d42SBarry Smith 
97*83287d42SBarry Smith       /* estimate how much additional space we will need */
98*83287d42SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
99*83287d42SBarry Smith       /* just double the memory each time */
100*83287d42SBarry Smith       int maxadd = jmax;
101*83287d42SBarry Smith       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
102*83287d42SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
103*83287d42SBarry Smith       jmax += maxadd;
104*83287d42SBarry Smith 
105*83287d42SBarry Smith       /* allocate a longer ajnew */
106*83287d42SBarry Smith       ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp);
107*83287d42SBarry Smith       ierr  = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));CHKERRQ(ierr);
108*83287d42SBarry Smith       ierr = PetscFree(ajnew);CHKERRQ(ierr);
109*83287d42SBarry Smith       ajnew = ajtmp;
110*83287d42SBarry Smith       realloc++; /* count how many times we realloc */
111*83287d42SBarry Smith     }
112*83287d42SBarry Smith     ajtmp = ajnew + ainew[i];
113*83287d42SBarry Smith     fm    = fill[n];
114*83287d42SBarry Smith     nzi   = 0;
115*83287d42SBarry Smith     im[i] = nnz;
116*83287d42SBarry Smith     while (nnz--) {
117*83287d42SBarry Smith       if (fm < i) nzi++;
118*83287d42SBarry Smith       *ajtmp++ = fm;
119*83287d42SBarry Smith       fm       = fill[fm];
120*83287d42SBarry Smith     }
121*83287d42SBarry Smith     idnew[i] = ainew[i] + nzi;
122*83287d42SBarry Smith   }
123*83287d42SBarry Smith 
124*83287d42SBarry Smith   if (ai[n] != 0) {
125*83287d42SBarry Smith     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
126*83287d42SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
127*83287d42SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af);
128*83287d42SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af);
129*83287d42SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n");
130*83287d42SBarry Smith   } else {
131*83287d42SBarry Smith      PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n");
132*83287d42SBarry Smith   }
133*83287d42SBarry Smith 
134*83287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
135*83287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
136*83287d42SBarry Smith 
137*83287d42SBarry Smith   ierr = PetscFree(fill);CHKERRQ(ierr);
138*83287d42SBarry Smith 
139*83287d42SBarry Smith   /* put together the new matrix */
140*83287d42SBarry Smith   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B);CHKERRQ(ierr);
141*83287d42SBarry Smith   PLogObjectParent(*B,isicol);
142*83287d42SBarry Smith   b = (Mat_SeqBAIJ*)(*B)->data;
143*83287d42SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
144*83287d42SBarry Smith   b->singlemalloc = PETSC_FALSE;
145*83287d42SBarry Smith   /* the next line frees the default space generated by the Create() */
146*83287d42SBarry Smith   ierr = PetscFree(b->a);CHKERRQ(ierr);
147*83287d42SBarry Smith   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
148*83287d42SBarry Smith   b->a          = (MatScalar*)PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2);CHKPTRQ(b->a);
149*83287d42SBarry Smith   b->j          = ajnew;
150*83287d42SBarry Smith   b->i          = ainew;
151*83287d42SBarry Smith   b->diag       = idnew;
152*83287d42SBarry Smith   b->ilen       = 0;
153*83287d42SBarry Smith   b->imax       = 0;
154*83287d42SBarry Smith   b->row        = isrow;
155*83287d42SBarry Smith   b->col        = iscol;
156*83287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
157*83287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
158*83287d42SBarry Smith   b->icol       = isicol;
159*83287d42SBarry Smith   b->solve_work = (Scalar*)PetscMalloc((bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
160*83287d42SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
161*83287d42SBarry Smith      Allocate idnew, solve_work, new a, new j */
162*83287d42SBarry Smith   PLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar)));
163*83287d42SBarry Smith   b->maxnz = b->nz = ainew[n];
164*83287d42SBarry Smith 
165*83287d42SBarry Smith   (*B)->factor                 = FACTOR_LU;
166*83287d42SBarry Smith   (*B)->info.factor_mallocs    = realloc;
167*83287d42SBarry Smith   (*B)->info.fill_ratio_given  = f;
168*83287d42SBarry Smith   if (ai[n] != 0) {
169*83287d42SBarry Smith     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
170*83287d42SBarry Smith   } else {
171*83287d42SBarry Smith     (*B)->info.fill_ratio_needed = 0.0;
172*83287d42SBarry Smith   }
173*83287d42SBarry Smith   PetscFunctionReturn(0);
174*83287d42SBarry Smith }
175