xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision db4efbfdc2bb749ba21a5c202e01785163a71462)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
683287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
783287d42SBarry Smith #include "src/inline/ilu.h"
883287d42SBarry Smith 
9*db4efbfdSBarry Smith #undef __FUNCT__
10*db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11*db4efbfdSBarry Smith /*
12*db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13*db4efbfdSBarry Smith */
14*db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15*db4efbfdSBarry Smith {
16*db4efbfdSBarry Smith   PetscFunctionBegin;
17*db4efbfdSBarry Smith   if (natural) {
18*db4efbfdSBarry Smith     switch (inA->rmap->bs) {
19*db4efbfdSBarry Smith     case 1:
20*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21*db4efbfdSBarry Smith       break;
22*db4efbfdSBarry Smith     case 2:
23*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24*db4efbfdSBarry Smith       break;
25*db4efbfdSBarry Smith     case 3:
26*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27*db4efbfdSBarry Smith       break;
28*db4efbfdSBarry Smith     case 4:
29*db4efbfdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
30*db4efbfdSBarry Smith       {
31*db4efbfdSBarry Smith         PetscTruth  sse_enabled_local;
32*db4efbfdSBarry Smith         PetscErrorCode ierr;
33*db4efbfdSBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
34*db4efbfdSBarry Smith         if (sse_enabled_local) {
35*db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
36*db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37*db4efbfdSBarry Smith           if (n==(unsigned short)n) {
38*db4efbfdSBarry Smith             unsigned short *aj=(unsigned short *)AJ;
39*db4efbfdSBarry Smith             for (i=0;i<nz;i++) {
40*db4efbfdSBarry Smith               aj[i] = (unsigned short)AJ[i];
41*db4efbfdSBarry Smith             }
42*db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43*db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44*db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
45*db4efbfdSBarry Smith           } else {
46*db4efbfdSBarry Smith         /* Scale the column indices for easier indexing in MatSolve. */
47*db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
48*db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
49*db4efbfdSBarry Smith /*            } */
50*db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51*db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52*db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
53*db4efbfdSBarry Smith           }
54*db4efbfdSBarry Smith #  else
55*db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
56*db4efbfdSBarry Smith           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57*db4efbfdSBarry Smith #  endif
58*db4efbfdSBarry Smith         } else {
59*db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60*db4efbfdSBarry Smith         }
61*db4efbfdSBarry Smith       }
62*db4efbfdSBarry Smith #else
63*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64*db4efbfdSBarry Smith #endif
65*db4efbfdSBarry Smith       break;
66*db4efbfdSBarry Smith     case 5:
67*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68*db4efbfdSBarry Smith       break;
69*db4efbfdSBarry Smith     case 6:
70*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71*db4efbfdSBarry Smith       break;
72*db4efbfdSBarry Smith     case 7:
73*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74*db4efbfdSBarry Smith       break;
75*db4efbfdSBarry Smith     default:
76*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77*db4efbfdSBarry Smith       break;
78*db4efbfdSBarry Smith     }
79*db4efbfdSBarry Smith   } else {
80*db4efbfdSBarry Smith     switch (inA->rmap->bs) {
81*db4efbfdSBarry Smith     case 1:
82*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83*db4efbfdSBarry Smith       break;
84*db4efbfdSBarry Smith     case 2:
85*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86*db4efbfdSBarry Smith       break;
87*db4efbfdSBarry Smith     case 3:
88*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89*db4efbfdSBarry Smith       break;
90*db4efbfdSBarry Smith     case 4:
91*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92*db4efbfdSBarry Smith       break;
93*db4efbfdSBarry Smith     case 5:
94*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95*db4efbfdSBarry Smith       break;
96*db4efbfdSBarry Smith     case 6:
97*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98*db4efbfdSBarry Smith       break;
99*db4efbfdSBarry Smith     case 7:
100*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101*db4efbfdSBarry Smith       break;
102*db4efbfdSBarry Smith     default:
103*db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104*db4efbfdSBarry Smith       break;
105*db4efbfdSBarry Smith     }
106*db4efbfdSBarry Smith   }
107*db4efbfdSBarry Smith   PetscFunctionReturn(0);
108*db4efbfdSBarry Smith }
109*db4efbfdSBarry Smith 
11083287d42SBarry Smith /*
11183287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
11283287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
11383287d42SBarry Smith   NOT good code reuse.
11483287d42SBarry Smith */
1154a2ae208SSatish Balay #undef __FUNCT__
1164a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
117dfbe8321SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
11883287d42SBarry Smith {
11983287d42SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b;
12083287d42SBarry Smith   IS             isicol;
1216849ba73SBarry Smith   PetscErrorCode ierr;
122690b6cddSBarry Smith   PetscInt       *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j;
123d0f46423SBarry Smith   PetscInt       *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->rmap->bs,bs2=a->bs2;
124418422e8SSatish Balay   PetscInt       *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
12583287d42SBarry Smith   PetscReal      f = 1.0;
126*db4efbfdSBarry Smith   PetscTruth     row_identity,col_identity;
12783287d42SBarry Smith 
12883287d42SBarry Smith   PetscFunctionBegin;
129d0f46423SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
13083287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
13183287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
13283287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
13383287d42SBarry Smith 
134d3d32019SBarry Smith   f = info->fill;
13583287d42SBarry Smith   /* get new row pointers */
136690b6cddSBarry Smith   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
13783287d42SBarry Smith   ainew[0] = 0;
13883287d42SBarry Smith   /* don't know how many column pointers are needed so estimate */
139690b6cddSBarry Smith   jmax     = (PetscInt)(f*ai[n] + 1);
140690b6cddSBarry Smith   ierr     = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
14183287d42SBarry Smith   /* fill is a linked list of nonzeros in active row */
142690b6cddSBarry Smith   ierr     = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
14383287d42SBarry Smith   im       = fill + n + 1;
14483287d42SBarry Smith   /* idnew is location of diagonal in factor */
145690b6cddSBarry Smith   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr);
14683287d42SBarry Smith   idnew[0] = 0;
14783287d42SBarry Smith 
14883287d42SBarry Smith   for (i=0; i<n; i++) {
14983287d42SBarry Smith     /* first copy previous fill into linked list */
15083287d42SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
15183287d42SBarry Smith     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
15283287d42SBarry Smith     ajtmp   = aj + ai[r[i]];
15383287d42SBarry Smith     fill[n] = n;
15483287d42SBarry Smith     while (nz--) {
15583287d42SBarry Smith       fm  = n;
15683287d42SBarry Smith       idx = ic[*ajtmp++];
15783287d42SBarry Smith       do {
15883287d42SBarry Smith         m  = fm;
15983287d42SBarry Smith         fm = fill[m];
16083287d42SBarry Smith       } while (fm < idx);
16183287d42SBarry Smith       fill[m]   = idx;
16283287d42SBarry Smith       fill[idx] = fm;
16383287d42SBarry Smith     }
16483287d42SBarry Smith     row = fill[n];
16583287d42SBarry Smith     while (row < i) {
16683287d42SBarry Smith       ajtmp = ajnew + idnew[row] + 1;
16783287d42SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
16883287d42SBarry Smith       nz    = im[row] - nzbd;
16983287d42SBarry Smith       fm    = row;
17083287d42SBarry Smith       while (nz-- > 0) {
17183287d42SBarry Smith         idx = *ajtmp++;
17283287d42SBarry Smith         nzbd++;
17383287d42SBarry Smith         if (idx == i) im[row] = nzbd;
17483287d42SBarry Smith         do {
17583287d42SBarry Smith           m  = fm;
17683287d42SBarry Smith           fm = fill[m];
17783287d42SBarry Smith         } while (fm < idx);
17883287d42SBarry Smith         if (fm != idx) {
17983287d42SBarry Smith           fill[m]   = idx;
18083287d42SBarry Smith           fill[idx] = fm;
18183287d42SBarry Smith           fm        = idx;
18283287d42SBarry Smith           nnz++;
18383287d42SBarry Smith         }
18483287d42SBarry Smith       }
18583287d42SBarry Smith       row = fill[row];
18683287d42SBarry Smith     }
18783287d42SBarry Smith     /* copy new filled row into permanent storage */
18883287d42SBarry Smith     ainew[i+1] = ainew[i] + nnz;
18983287d42SBarry Smith     if (ainew[i+1] > jmax) {
19083287d42SBarry Smith 
19183287d42SBarry Smith       /* estimate how much additional space we will need */
19283287d42SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
19383287d42SBarry Smith       /* just double the memory each time */
194690b6cddSBarry Smith       PetscInt maxadd = jmax;
19583287d42SBarry Smith       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
19683287d42SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
19783287d42SBarry Smith       jmax += maxadd;
19883287d42SBarry Smith 
19983287d42SBarry Smith       /* allocate a longer ajnew */
200690b6cddSBarry Smith       ierr  = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
201690b6cddSBarry Smith       ierr  = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr);
20283287d42SBarry Smith       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
20383287d42SBarry Smith       ajnew = ajtmp;
204418422e8SSatish Balay       reallocs++; /* count how many times we realloc */
20583287d42SBarry Smith     }
20683287d42SBarry Smith     ajtmp = ajnew + ainew[i];
20783287d42SBarry Smith     fm    = fill[n];
20883287d42SBarry Smith     nzi   = 0;
20983287d42SBarry Smith     im[i] = nnz;
21083287d42SBarry Smith     while (nnz--) {
21183287d42SBarry Smith       if (fm < i) nzi++;
21283287d42SBarry Smith       *ajtmp++ = fm;
21383287d42SBarry Smith       fm       = fill[fm];
21483287d42SBarry Smith     }
21583287d42SBarry Smith     idnew[i] = ainew[i] + nzi;
21683287d42SBarry Smith   }
21783287d42SBarry Smith 
2186cf91177SBarry Smith #if defined(PETSC_USE_INFO)
21983287d42SBarry Smith   if (ai[n] != 0) {
22083287d42SBarry Smith     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
221ae15b995SBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
222ae15b995SBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
223ae15b995SBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
224ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
22583287d42SBarry Smith   } else {
226ae15b995SBarry Smith     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
22783287d42SBarry Smith   }
22863ba0a88SBarry Smith #endif
22983287d42SBarry Smith 
23083287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
23183287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
23283287d42SBarry Smith 
23383287d42SBarry Smith   ierr = PetscFree(fill);CHKERRQ(ierr);
23483287d42SBarry Smith 
23583287d42SBarry Smith   /* put together the new matrix */
236ab93d7beSBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
23752e6d16bSBarry Smith   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
23883287d42SBarry Smith   b = (Mat_SeqBAIJ*)(*B)->data;
23983287d42SBarry Smith   b->singlemalloc = PETSC_FALSE;
240e6b907acSBarry Smith   b->free_a     = PETSC_TRUE;
241e6b907acSBarry Smith   b->free_ij    = PETSC_TRUE;
24282502324SSatish Balay   ierr          = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
24383287d42SBarry Smith   b->j          = ajnew;
24483287d42SBarry Smith   b->i          = ainew;
24583287d42SBarry Smith   b->diag       = idnew;
24683287d42SBarry Smith   b->ilen       = 0;
24783287d42SBarry Smith   b->imax       = 0;
24883287d42SBarry Smith   b->row        = isrow;
24983287d42SBarry Smith   b->col        = iscol;
250d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
25183287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
25283287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
25383287d42SBarry Smith   b->icol       = isicol;
25487828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
25583287d42SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
25683287d42SBarry Smith      Allocate idnew, solve_work, new a, new j */
25752e6d16bSBarry Smith   ierr = PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
25883287d42SBarry Smith   b->maxnz = b->nz = ainew[n];
25983287d42SBarry Smith 
2605c9eb25fSBarry Smith   (*B)->factor                 = MAT_FACTOR_LU;
261418422e8SSatish Balay   (*B)->info.factor_mallocs    = reallocs;
26283287d42SBarry Smith   (*B)->info.fill_ratio_given  = f;
26383287d42SBarry Smith   if (ai[n] != 0) {
26483287d42SBarry Smith     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
26583287d42SBarry Smith   } else {
26683287d42SBarry Smith     (*B)->info.fill_ratio_needed = 0.0;
26783287d42SBarry Smith   }
268*db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
269*db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
270*db4efbfdSBarry Smith   ierr = MatSeqBAIJSetNumericFactorization(*B,row_identity && col_identity);CHKERRQ(ierr);
27183287d42SBarry Smith   PetscFunctionReturn(0);
27283287d42SBarry Smith  }
273*db4efbfdSBarry Smith 
274