xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
2*be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
683287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
783287d42SBarry Smith #include "src/inline/ilu.h"
883287d42SBarry Smith 
983287d42SBarry Smith /*
1083287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
1183287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
1283287d42SBarry Smith   NOT good code reuse.
1383287d42SBarry Smith */
144a2ae208SSatish Balay #undef __FUNCT__
154a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
16dfbe8321SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
1783287d42SBarry Smith {
1883287d42SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b;
1983287d42SBarry Smith   IS             isicol;
206849ba73SBarry Smith   PetscErrorCode ierr;
21690b6cddSBarry Smith   PetscInt       *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j;
22521d7252SBarry Smith   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->bs,bs2=a->bs2;
23418422e8SSatish Balay   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
2483287d42SBarry Smith   PetscReal      f = 1.0;
2583287d42SBarry Smith 
2683287d42SBarry Smith   PetscFunctionBegin;
2783287d42SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
2883287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2983287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3083287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3183287d42SBarry Smith 
32d3d32019SBarry Smith   f = info->fill;
3383287d42SBarry Smith   /* get new row pointers */
34690b6cddSBarry Smith   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
3583287d42SBarry Smith   ainew[0] = 0;
3683287d42SBarry Smith   /* don't know how many column pointers are needed so estimate */
37690b6cddSBarry Smith   jmax     = (PetscInt)(f*ai[n] + 1);
38690b6cddSBarry Smith   ierr     = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
3983287d42SBarry Smith   /* fill is a linked list of nonzeros in active row */
40690b6cddSBarry Smith   ierr     = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
4183287d42SBarry Smith   im       = fill + n + 1;
4283287d42SBarry Smith   /* idnew is location of diagonal in factor */
43690b6cddSBarry Smith   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
4483287d42SBarry Smith   idnew[0] = 0;
4583287d42SBarry Smith 
4683287d42SBarry Smith   for (i=0; i<n; i++) {
4783287d42SBarry Smith     /* first copy previous fill into linked list */
4883287d42SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
4983287d42SBarry Smith     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
5083287d42SBarry Smith     ajtmp   = aj + ai[r[i]];
5183287d42SBarry Smith     fill[n] = n;
5283287d42SBarry Smith     while (nz--) {
5383287d42SBarry Smith       fm  = n;
5483287d42SBarry Smith       idx = ic[*ajtmp++];
5583287d42SBarry Smith       do {
5683287d42SBarry Smith         m  = fm;
5783287d42SBarry Smith         fm = fill[m];
5883287d42SBarry Smith       } while (fm < idx);
5983287d42SBarry Smith       fill[m]   = idx;
6083287d42SBarry Smith       fill[idx] = fm;
6183287d42SBarry Smith     }
6283287d42SBarry Smith     row = fill[n];
6383287d42SBarry Smith     while (row < i) {
6483287d42SBarry Smith       ajtmp = ajnew + idnew[row] + 1;
6583287d42SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
6683287d42SBarry Smith       nz    = im[row] - nzbd;
6783287d42SBarry Smith       fm    = row;
6883287d42SBarry Smith       while (nz-- > 0) {
6983287d42SBarry Smith         idx = *ajtmp++;
7083287d42SBarry Smith         nzbd++;
7183287d42SBarry Smith         if (idx == i) im[row] = nzbd;
7283287d42SBarry Smith         do {
7383287d42SBarry Smith           m  = fm;
7483287d42SBarry Smith           fm = fill[m];
7583287d42SBarry Smith         } while (fm < idx);
7683287d42SBarry Smith         if (fm != idx) {
7783287d42SBarry Smith           fill[m]   = idx;
7883287d42SBarry Smith           fill[idx] = fm;
7983287d42SBarry Smith           fm        = idx;
8083287d42SBarry Smith           nnz++;
8183287d42SBarry Smith         }
8283287d42SBarry Smith       }
8383287d42SBarry Smith       row = fill[row];
8483287d42SBarry Smith     }
8583287d42SBarry Smith     /* copy new filled row into permanent storage */
8683287d42SBarry Smith     ainew[i+1] = ainew[i] + nnz;
8783287d42SBarry Smith     if (ainew[i+1] > jmax) {
8883287d42SBarry Smith 
8983287d42SBarry Smith       /* estimate how much additional space we will need */
9083287d42SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
9183287d42SBarry Smith       /* just double the memory each time */
92690b6cddSBarry Smith       PetscInt maxadd = jmax;
9383287d42SBarry Smith       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
9483287d42SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
9583287d42SBarry Smith       jmax += maxadd;
9683287d42SBarry Smith 
9783287d42SBarry Smith       /* allocate a longer ajnew */
98690b6cddSBarry Smith       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
99690b6cddSBarry Smith       ierr  = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr);
10083287d42SBarry Smith       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
10183287d42SBarry Smith       ajnew = ajtmp;
102418422e8SSatish Balay       reallocs++; /* count how many times we realloc */
10383287d42SBarry Smith     }
10483287d42SBarry Smith     ajtmp = ajnew + ainew[i];
10583287d42SBarry Smith     fm    = fill[n];
10683287d42SBarry Smith     nzi   = 0;
10783287d42SBarry Smith     im[i] = nnz;
10883287d42SBarry Smith     while (nnz--) {
10983287d42SBarry Smith       if (fm < i) nzi++;
11083287d42SBarry Smith       *ajtmp++ = fm;
11183287d42SBarry Smith       fm       = fill[fm];
11283287d42SBarry Smith     }
11383287d42SBarry Smith     idnew[i] = ainew[i] + nzi;
11483287d42SBarry Smith   }
11583287d42SBarry Smith 
11663ba0a88SBarry Smith #if defined(PETSC_USE_DEBUG)
11783287d42SBarry Smith   if (ai[n] != 0) {
11883287d42SBarry Smith     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
11963ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
12063ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr);
12163ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
12263ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n"));CHKERRQ(ierr);
12383287d42SBarry Smith   } else {
12463ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n"));CHKERRQ(ierr);
12583287d42SBarry Smith   }
12663ba0a88SBarry Smith #endif
12783287d42SBarry Smith 
12883287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
12983287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
13083287d42SBarry Smith 
13183287d42SBarry Smith   ierr = PetscFree(fill);CHKERRQ(ierr);
13283287d42SBarry Smith 
13383287d42SBarry Smith   /* put together the new matrix */
134f204ca49SKris Buschelman   ierr = MatCreate(A->comm,bs*n,bs*n,bs*n,bs*n,B);CHKERRQ(ierr);
135f204ca49SKris Buschelman   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
136f204ca49SKris Buschelman   ierr = MatSeqBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
13752e6d16bSBarry Smith   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
13883287d42SBarry Smith   b = (Mat_SeqBAIJ*)(*B)->data;
13983287d42SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
14083287d42SBarry Smith   b->singlemalloc = PETSC_FALSE;
14183287d42SBarry Smith   /* the next line frees the default space generated by the Create() */
14283287d42SBarry Smith   ierr = PetscFree(b->a);CHKERRQ(ierr);
14383287d42SBarry Smith   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
14482502324SSatish Balay   ierr          = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
14583287d42SBarry Smith   b->j          = ajnew;
14683287d42SBarry Smith   b->i          = ainew;
14783287d42SBarry Smith   b->diag       = idnew;
14883287d42SBarry Smith   b->ilen       = 0;
14983287d42SBarry Smith   b->imax       = 0;
15083287d42SBarry Smith   b->row        = isrow;
15183287d42SBarry Smith   b->col        = iscol;
152d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
15383287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
15483287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
15583287d42SBarry Smith   b->icol       = isicol;
15687828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
15783287d42SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
15883287d42SBarry Smith      Allocate idnew, solve_work, new a, new j */
15952e6d16bSBarry Smith   ierr = PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
16083287d42SBarry Smith   b->maxnz = b->nz = ainew[n];
16183287d42SBarry Smith 
16283287d42SBarry Smith   (*B)->factor                 = FACTOR_LU;
163418422e8SSatish Balay   (*B)->info.factor_mallocs    = reallocs;
16483287d42SBarry Smith   (*B)->info.fill_ratio_given  = f;
16583287d42SBarry Smith   if (ai[n] != 0) {
16683287d42SBarry Smith     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
16783287d42SBarry Smith   } else {
16883287d42SBarry Smith     (*B)->info.fill_ratio_needed = 0.0;
16983287d42SBarry Smith   }
17083287d42SBarry Smith   PetscFunctionReturn(0);
17183287d42SBarry Smith }
172